From e9ef88fbd299051df0bb8b344393c50cc1d159d8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Sun, 17 Jun 2012 18:18:16 +0300
Subject: [PATCH 01/14] rtpenc: Fix memory leaks in the muxer open function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Also return a proper error code in these cases.

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 libavformat/rtpenc.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/libavformat/rtpenc.c b/libavformat/rtpenc.c
index 71eeb7e32f..6752fb6f7e 100644
--- a/libavformat/rtpenc.c
+++ b/libavformat/rtpenc.c
@@ -198,11 +198,11 @@ static int rtp_write_header(AVFormatContext *s1)
         /* max_header_toc_size + the largest AMR payload must fit */
         if (1 + s->max_frames_per_packet + n > s->max_payload_size) {
             av_log(s1, AV_LOG_ERROR, "RTP max payload size too small for AMR\n");
-            return -1;
+            goto fail;
         }
         if (st->codec->channels != 1) {
             av_log(s1, AV_LOG_ERROR, "Only mono is supported\n");
-            return -1;
+            goto fail;
         }
     case CODEC_ID_AAC:
         s->num_frames = 0;
@@ -216,6 +216,10 @@ defaultcase:
     }
 
     return 0;
+
+fail:
+    av_freep(&s->buf);
+    return AVERROR(EINVAL);
 }
 
 /* send an rtcp sender report packet */

From 634e874de58b6a1ad50134bea79505e146b87d39 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Sun, 17 Jun 2012 19:06:56 +0300
Subject: [PATCH 02/14] amr: More space cleanup
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This was missed in the previous cleanup patch.

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 libavformat/amr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/amr.c b/libavformat/amr.c
index 12b2b7fe58..efb5eddbd9 100644
--- a/libavformat/amr.c
+++ b/libavformat/amr.c
@@ -123,7 +123,7 @@ static int amr_read_packet(AVFormatContext *s, AVPacket *pkt)
         };
 
         size = packed_size[mode] + 1;
-    } else if(enc->codec_id == CODEC_ID_AMR_WB) {
+    } else if (enc->codec_id == CODEC_ID_AMR_WB) {
         static uint8_t packed_size[16] = {
             18, 24, 33, 37, 41, 47, 51, 59, 61, 6, 6, 0, 0, 0, 1, 1
         };

From eb3918c1fd9a363111457285002eaebbaaa18eed Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Sun, 17 Jun 2012 19:08:23 +0300
Subject: [PATCH 03/14] amr: Mark an array const
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 libavformat/amr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libavformat/amr.c b/libavformat/amr.c
index efb5eddbd9..f20bc0aa69 100644
--- a/libavformat/amr.c
+++ b/libavformat/amr.c
@@ -124,7 +124,7 @@ static int amr_read_packet(AVFormatContext *s, AVPacket *pkt)
 
         size = packed_size[mode] + 1;
     } else if (enc->codec_id == CODEC_ID_AMR_WB) {
-        static uint8_t packed_size[16] = {
+        static const uint8_t packed_size[16] = {
             18, 24, 33, 37, 41, 47, 51, 59, 61, 6, 6, 0, 0, 0, 1, 1
         };
 

From eb564b23a3768edea1d8c2d20439e6a4fdea2747 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Sun, 17 Jun 2012 21:15:32 +0300
Subject: [PATCH 04/14] http: Fail reading if the connection has gone away
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This can happen if doing a new request using the same socket,
but the new request failed, which clears the urlcontext.

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 libavformat/http.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/libavformat/http.c b/libavformat/http.c
index b2f2ea97f2..2b5f2cc53b 100644
--- a/libavformat/http.c
+++ b/libavformat/http.c
@@ -510,6 +510,9 @@ static int http_read(URLContext *h, uint8_t *buf, int size)
     HTTPContext *s = h->priv_data;
     int err, new_location;
 
+    if (!s->hd)
+        return AVERROR_EOF;
+
     if (s->end_chunked_post) {
         if (!s->end_header) {
             err = http_read_header(h, &new_location);

From 35127bf156df09ebf43f1ad7ea236653f7ba7707 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Sun, 17 Jun 2012 21:19:41 +0300
Subject: [PATCH 05/14] http: Properly handle chunked transfer-encoding for
 replies to post data
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 libavformat/http.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/libavformat/http.c b/libavformat/http.c
index 2b5f2cc53b..a4941937dd 100644
--- a/libavformat/http.c
+++ b/libavformat/http.c
@@ -353,6 +353,8 @@ static int http_read_header(URLContext *h, int *new_location)
     char line[1024];
     int err = 0;
 
+    s->chunksize = -1;
+
     for (;;) {
         if ((err = http_get_line(s, line, sizeof(line))) < 0)
             return err;
@@ -470,7 +472,6 @@ static int http_connect(URLContext *h, const char *path, const char *local_path,
         s->http_code = 200;
         return 0;
     }
-    s->chunksize = -1;
 
     /* wait for header */
     err = http_read_header(h, new_location);
@@ -513,14 +514,10 @@ static int http_read(URLContext *h, uint8_t *buf, int size)
     if (!s->hd)
         return AVERROR_EOF;
 
-    if (s->end_chunked_post) {
-        if (!s->end_header) {
-            err = http_read_header(h, &new_location);
-            if (err < 0)
-                return err;
-        }
-
-        return http_buf_read(h, buf, size);
+    if (s->end_chunked_post && !s->end_header) {
+        err = http_read_header(h, &new_location);
+        if (err < 0)
+            return err;
     }
 
     if (s->chunksize >= 0) {

From 8e50c57dcb481479f2fd46f9bdb6a9776b0d9fa6 Mon Sep 17 00:00:00 2001
From: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Date: Sun, 17 Jun 2012 20:24:43 +0200
Subject: [PATCH 06/14] RTMPT protocol support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This adds two protocols, but one of them is an internal implementation
detail just used as an abstraction layer/generalization in the code. The
RTMPT protocol implementation uses rtmphttp:// as an alternative to the
tcp:// protocol. This allows moving most of the lower level logic out
from the higher level generic rtmp code.

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 Changelog                |   1 +
 configure                |   4 +
 doc/protocols.texi       |   8 ++
 libavformat/Makefile     |   2 +
 libavformat/allformats.c |   2 +
 libavformat/rtmphttp.c   | 239 +++++++++++++++++++++++++++++++++++++++
 libavformat/rtmpproto.c  |  30 ++++-
 libavformat/version.h    |   4 +-
 8 files changed, 285 insertions(+), 5 deletions(-)
 create mode 100644 libavformat/rtmphttp.c

diff --git a/Changelog b/Changelog
index b80ff8885e..4288aa3cc5 100644
--- a/Changelog
+++ b/Changelog
@@ -25,6 +25,7 @@ version <next>:
   be used with -of old.
 - Indeo Audio decoder
 - channelsplit audio filter
+- RTMPT protocol support
 
 
 version 0.8:
diff --git a/configure b/configure
index 4bb20301e7..d614366b2d 100755
--- a/configure
+++ b/configure
@@ -1511,6 +1511,10 @@ mmsh_protocol_select="http_protocol"
 mmst_protocol_deps="network"
 rtmp_protocol_deps="!librtmp_protocol"
 rtmp_protocol_select="tcp_protocol"
+rtmphttp_protocol_deps="!librtmp_protocol"
+rtmphttp_protocol_select="http_protocol"
+rtmpt_protocol_deps="!librtmp_protocol"
+rtmpt_protocol_select="rtmphttp_protocol"
 rtp_protocol_select="udp_protocol"
 sctp_protocol_deps="network netinet_sctp_h"
 tcp_protocol_deps="network"
diff --git a/doc/protocols.texi b/doc/protocols.texi
index 84920332f6..0b4f1b1772 100644
--- a/doc/protocols.texi
+++ b/doc/protocols.texi
@@ -243,6 +243,14 @@ For example to read with @command{avplay} a multimedia resource named
 avplay rtmp://myserver/vod/sample
 @end example
 
+@section rtmpt
+
+Real-Time Messaging Protocol tunneled through HTTP.
+
+The Real-Time Messaging Protocol tunneled through HTTP (RTMPT) is used
+for streaming multimedia content within HTTP requests to traverse
+firewalls.
+
 @section rtmp, rtmpe, rtmps, rtmpt, rtmpte
 
 Real-Time Messaging Protocol and its variants supported through
diff --git a/libavformat/Makefile b/libavformat/Makefile
index ca4f7a02e1..6262324830 100644
--- a/libavformat/Makefile
+++ b/libavformat/Makefile
@@ -345,6 +345,8 @@ OBJS-$(CONFIG_MMST_PROTOCOL)             += mmst.o mms.o asf.o
 OBJS-$(CONFIG_MD5_PROTOCOL)              += md5proto.o
 OBJS-$(CONFIG_PIPE_PROTOCOL)             += file.o
 OBJS-$(CONFIG_RTMP_PROTOCOL)             += rtmpproto.o rtmppkt.o
+OBJS-$(CONFIG_RTMPHTTP_PROTOCOL)         += rtmphttp.o
+OBJS-$(CONFIG_RTMPT_PROTOCOL)            += rtmpproto.o rtmppkt.o
 OBJS-$(CONFIG_RTP_PROTOCOL)              += rtpproto.o
 OBJS-$(CONFIG_SCTP_PROTOCOL)             += sctp.o
 OBJS-$(CONFIG_TCP_PROTOCOL)              += tcp.o
diff --git a/libavformat/allformats.c b/libavformat/allformats.c
index 1320a28ac6..42c588f294 100644
--- a/libavformat/allformats.c
+++ b/libavformat/allformats.c
@@ -256,6 +256,8 @@ void av_register_all(void)
     REGISTER_PROTOCOL (MD5,  md5);
     REGISTER_PROTOCOL (PIPE, pipe);
     REGISTER_PROTOCOL (RTMP, rtmp);
+    REGISTER_PROTOCOL (RTMPHTTP, rtmphttp);
+    REGISTER_PROTOCOL (RTMPT, rtmpt);
     REGISTER_PROTOCOL (RTP, rtp);
     REGISTER_PROTOCOL (SCTP, sctp);
     REGISTER_PROTOCOL (TCP, tcp);
diff --git a/libavformat/rtmphttp.c b/libavformat/rtmphttp.c
new file mode 100644
index 0000000000..fdcff50bed
--- /dev/null
+++ b/libavformat/rtmphttp.c
@@ -0,0 +1,239 @@
+/*
+ * RTMP HTTP network protocol
+ * Copyright (c) 2012 Samuel Pitoiset
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * RTMP HTTP protocol
+ */
+
+#include "libavutil/avstring.h"
+#include "libavutil/intfloat.h"
+#include "libavutil/opt.h"
+#include "internal.h"
+#include "http.h"
+
+#define RTMPT_DEFAULT_PORT 80
+
+/* protocol handler context */
+typedef struct RTMP_HTTPContext {
+    URLContext   *stream;           ///< HTTP stream
+    char         host[256];         ///< hostname of the server
+    int          port;              ///< port to connect (default is 80)
+    char         client_id[64];     ///< client ID used for all requests except the first one
+    int          seq;               ///< sequence ID used for all requests
+    uint8_t      *out_data;         ///< output buffer
+    int          out_size;          ///< current output buffer size
+    int          out_capacity;      ///< current output buffer capacity
+    int          initialized;       ///< flag indicating when the http context is initialized
+    int          finishing;         ///< flag indicating when the client closes the connection
+} RTMP_HTTPContext;
+
+static int rtmp_http_send_cmd(URLContext *h, const char *cmd)
+{
+    RTMP_HTTPContext *rt = h->priv_data;
+    char uri[2048];
+    uint8_t c;
+    int ret;
+
+    ff_url_join(uri, sizeof(uri), "http", NULL, rt->host, rt->port,
+                "/%s/%s/%d", cmd, rt->client_id, rt->seq++);
+
+    av_opt_set_bin(rt->stream->priv_data, "post_data", rt->out_data,
+                   rt->out_size, 0);
+
+    /* send a new request to the server */
+    if ((ret = ff_http_do_new_request(rt->stream, uri)) < 0)
+        return ret;
+
+    /* re-init output buffer */
+    rt->out_size = 0;
+
+    /* read the first byte which contains the polling interval */
+    if ((ret = ffurl_read(rt->stream, &c, 1)) < 0)
+        return ret;
+
+    return ret;
+}
+
+static int rtmp_http_write(URLContext *h, const uint8_t *buf, int size)
+{
+    RTMP_HTTPContext *rt = h->priv_data;
+    void *ptr;
+
+    if (rt->out_size + size > rt->out_capacity) {
+        rt->out_capacity = (rt->out_size + size) * 2;
+        ptr = av_realloc(rt->out_data, rt->out_capacity);
+        if (!ptr)
+            return AVERROR(ENOMEM);
+        rt->out_data = ptr;
+    }
+
+    memcpy(rt->out_data + rt->out_size, buf, size);
+    rt->out_size += size;
+
+    return size;
+}
+
+static int rtmp_http_read(URLContext *h, uint8_t *buf, int size)
+{
+    RTMP_HTTPContext *rt = h->priv_data;
+    int ret, off = 0;
+
+    /* try to read at least 1 byte of data */
+    do {
+        ret = ffurl_read(rt->stream, buf + off, size);
+        if (ret < 0 && ret != AVERROR_EOF)
+            return ret;
+
+        if (ret == AVERROR_EOF) {
+            if (rt->finishing) {
+                /* Do not send new requests when the client wants to
+                 * close the connection. */
+                return AVERROR(EAGAIN);
+            }
+
+            /* When the client has reached end of file for the last request,
+             * we have to send a new request if we have buffered data.
+             * Otherwise, we have to send an idle POST. */
+            if (rt->out_size > 0) {
+                if ((ret = rtmp_http_send_cmd(h, "send")) < 0)
+                    return ret;
+            } else {
+                if ((ret = rtmp_http_write(h, "", 1)) < 0)
+                    return ret;
+
+                if ((ret = rtmp_http_send_cmd(h, "idle")) < 0)
+                    return ret;
+            }
+
+            if (h->flags & AVIO_FLAG_NONBLOCK) {
+                /* no incoming data to handle in nonblocking mode */
+                return AVERROR(EAGAIN);
+            }
+        } else {
+            off  += ret;
+            size -= ret;
+        }
+    } while (off <= 0);
+
+    return off;
+}
+
+static int rtmp_http_close(URLContext *h)
+{
+    RTMP_HTTPContext *rt = h->priv_data;
+    uint8_t tmp_buf[2048];
+    int ret = 0;
+
+    if (rt->initialized) {
+        /* client wants to close the connection */
+        rt->finishing = 1;
+
+        do {
+            ret = rtmp_http_read(h, tmp_buf, sizeof(tmp_buf));
+        } while (ret > 0);
+
+        /* re-init output buffer before sending the close command */
+        rt->out_size = 0;
+
+        if ((ret = rtmp_http_write(h, "", 1)) == 1)
+            ret = rtmp_http_send_cmd(h, "close");
+    }
+
+    av_freep(&rt->out_data);
+    ffurl_close(rt->stream);
+
+    return ret;
+}
+
+static int rtmp_http_open(URLContext *h, const char *uri, int flags)
+{
+    RTMP_HTTPContext *rt = h->priv_data;
+    char headers[1024], url[1024];
+    int ret, off = 0;
+
+    av_url_split(NULL, 0, NULL, 0, rt->host, sizeof(rt->host), &rt->port,
+                 NULL, 0, uri);
+
+    if (rt->port < 0)
+        rt->port = RTMPT_DEFAULT_PORT;
+
+    /* This is the first request that is sent to the server in order to
+     * register a client on the server and start a new session. The server
+     * replies with a unique id (usually a number) that is used by the client
+     * for all future requests.
+     * Note: the reply doesn't contain a value for the polling interval.
+     * A successful connect resets the consecutive index that is used
+     * in the URLs. */
+    ff_url_join(url, sizeof(url), "http", NULL, rt->host, rt->port, "/open/1");
+
+    /* alloc the http context */
+    if ((ret = ffurl_alloc(&rt->stream, url, AVIO_FLAG_READ_WRITE, NULL)) < 0)
+        goto fail;
+
+    /* set options */
+    snprintf(headers, sizeof(headers),
+             "Cache-Control: no-cache\r\n"
+             "Content-type: application/x-fcs\r\n"
+             "User-Agent: Shockwave Flash\r\n");
+    av_opt_set(rt->stream->priv_data, "headers", headers, 0);
+    av_opt_set(rt->stream->priv_data, "multiple_requests", "1", 0);
+    av_opt_set_bin(rt->stream->priv_data, "post_data", "", 1, 0);
+
+    /* open the http context */
+    if ((ret = ffurl_connect(rt->stream, NULL)) < 0)
+        goto fail;
+
+    /* read the server reply which contains a unique ID */
+    for (;;) {
+        ret = ffurl_read(rt->stream, rt->client_id + off, sizeof(rt->client_id) - off);
+        if (ret == AVERROR_EOF)
+            break;
+        if (ret < 0)
+            goto fail;
+        off += ret;
+        if (off == sizeof(rt->client_id)) {
+            ret = AVERROR(EIO);
+            goto fail;
+        }
+    }
+    while (off > 0 && isspace(rt->client_id[off - 1]))
+        off--;
+    rt->client_id[off] = '\0';
+
+    /* http context is now initialized */
+    rt->initialized = 1;
+    return 0;
+
+fail:
+    rtmp_http_close(h);
+    return ret;
+}
+
+URLProtocol ff_rtmphttp_protocol = {
+    .name           = "rtmphttp",
+    .url_open       = rtmp_http_open,
+    .url_read       = rtmp_http_read,
+    .url_write      = rtmp_http_write,
+    .url_close      = rtmp_http_close,
+    .priv_data_size = sizeof(RTMP_HTTPContext),
+    .flags          = URL_PROTOCOL_FLAG_NETWORK,
+};
diff --git a/libavformat/rtmpproto.c b/libavformat/rtmpproto.c
index b3ae5a21e6..b3e2a30f48 100644
--- a/libavformat/rtmpproto.c
+++ b/libavformat/rtmpproto.c
@@ -1112,9 +1112,15 @@ static int rtmp_open(URLContext *s, const char *uri, int flags)
     av_url_split(proto, sizeof(proto), NULL, 0, hostname, sizeof(hostname), &port,
                  path, sizeof(path), s->filename);
 
-    if (port < 0)
-        port = RTMP_DEFAULT_PORT;
-    ff_url_join(buf, sizeof(buf), "tcp", NULL, hostname, port, NULL);
+    if (!strcmp(proto, "rtmpt")) {
+        /* open the http tunneling connection */
+        ff_url_join(buf, sizeof(buf), "rtmphttp", NULL, hostname, port, NULL);
+    } else {
+        /* open the tcp connection */
+        if (port < 0)
+            port = RTMP_DEFAULT_PORT;
+        ff_url_join(buf, sizeof(buf), "tcp", NULL, hostname, port, NULL);
+    }
 
     if ((ret = ffurl_open(&rt->stream, buf, AVIO_FLAG_READ_WRITE,
                           &s->interrupt_callback, NULL)) < 0) {
@@ -1425,3 +1431,21 @@ URLProtocol ff_rtmp_protocol = {
     .flags          = URL_PROTOCOL_FLAG_NETWORK,
     .priv_data_class= &rtmp_class,
 };
+
+static const AVClass rtmpt_class = {
+    .class_name = "rtmpt",
+    .item_name  = av_default_item_name,
+    .option     = rtmp_options,
+    .version    = LIBAVUTIL_VERSION_INT,
+};
+
+URLProtocol ff_rtmpt_protocol = {
+    .name            = "rtmpt",
+    .url_open        = rtmp_open,
+    .url_read        = rtmp_read,
+    .url_write       = rtmp_write,
+    .url_close       = rtmp_close,
+    .priv_data_size  = sizeof(RTMPContext),
+    .flags           = URL_PROTOCOL_FLAG_NETWORK,
+    .priv_data_class = &rtmpt_class,
+};
diff --git a/libavformat/version.h b/libavformat/version.h
index b00701eefa..aae0eb1d70 100644
--- a/libavformat/version.h
+++ b/libavformat/version.h
@@ -30,8 +30,8 @@
 #include "libavutil/avutil.h"
 
 #define LIBAVFORMAT_VERSION_MAJOR 54
-#define LIBAVFORMAT_VERSION_MINOR  3
-#define LIBAVFORMAT_VERSION_MICRO  1
+#define LIBAVFORMAT_VERSION_MINOR  4
+#define LIBAVFORMAT_VERSION_MICRO  0
 
 #define LIBAVFORMAT_VERSION_INT AV_VERSION_INT(LIBAVFORMAT_VERSION_MAJOR, \
                                                LIBAVFORMAT_VERSION_MINOR, \

From d9669eab0b8709f66d0872671511cb9487ea2651 Mon Sep 17 00:00:00 2001
From: "Ronald S. Bultje" <rsbultje@gmail.com>
Date: Thu, 14 Jun 2012 11:47:55 +0100
Subject: [PATCH 07/14] dwt: remove variable-length arrays

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/dwt.c             | 109 ++++++++++++++++++-----------------
 libavcodec/dwt.h             |  18 +++---
 libavcodec/snow.c            |   4 ++
 libavcodec/snow.h            |   2 +
 libavcodec/snowdec.c         |   2 +-
 libavcodec/snowenc.c         |  12 ++--
 libavcodec/x86/snowdsp_mmx.c |   6 +-
 7 files changed, 79 insertions(+), 74 deletions(-)

diff --git a/libavcodec/dwt.c b/libavcodec/dwt.c
index d3d4f3b545..56e4a572ea 100644
--- a/libavcodec/dwt.c
+++ b/libavcodec/dwt.c
@@ -243,9 +243,8 @@ static av_always_inline void inv_liftS(IDWTELEM *dst, IDWTELEM *src,
 }
 #endif /* ! liftS */
 
-static void horizontal_decompose53i(DWTELEM *b, int width)
+static void horizontal_decompose53i(DWTELEM *b, DWTELEM *temp, int width)
 {
-    DWTELEM temp[width];
     const int width2 = width >> 1;
     int x;
     const int w2 = (width + 1) >> 1;
@@ -311,8 +310,8 @@ static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2,
         b1[i] += (b0[i] + b2[i] + 2) >> 2;
 }
 
-static void spatial_decompose53i(DWTELEM *buffer, int width, int height,
-                                 int stride)
+static void spatial_decompose53i(DWTELEM *buffer, DWTELEM *temp,
+                                 int width, int height, int stride)
 {
     int y;
     DWTELEM *b0 = buffer + mirror(-2 - 1, height - 1) * stride;
@@ -323,9 +322,9 @@ static void spatial_decompose53i(DWTELEM *buffer, int width, int height,
         DWTELEM *b3 = buffer + mirror(y + 2, height - 1) * stride;
 
         if (y + 1 < (unsigned)height)
-            horizontal_decompose53i(b2, width);
+            horizontal_decompose53i(b2, temp, width);
         if (y + 2 < (unsigned)height)
-            horizontal_decompose53i(b3, width);
+            horizontal_decompose53i(b3, temp, width);
 
         if (y + 1 < (unsigned)height)
             vertical_decompose53iH0(b1, b2, b3, width);
@@ -337,9 +336,8 @@ static void spatial_decompose53i(DWTELEM *buffer, int width, int height,
     }
 }
 
-static void horizontal_decompose97i(DWTELEM *b, int width)
+static void horizontal_decompose97i(DWTELEM *b, DWTELEM *temp, int width)
 {
-    DWTELEM temp[width];
     const int w2 = (width + 1) >> 1;
 
     lift(temp + w2, b + 1, b,         1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1);
@@ -389,8 +387,8 @@ static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2,
         b1[i] += (W_DM * (b0[i] + b2[i]) + W_DO) >> W_DS;
 }
 
-static void spatial_decompose97i(DWTELEM *buffer, int width, int height,
-                                 int stride)
+static void spatial_decompose97i(DWTELEM *buffer, DWTELEM *temp,
+                                 int width, int height, int stride)
 {
     int y;
     DWTELEM *b0 = buffer + mirror(-4 - 1, height - 1) * stride;
@@ -403,9 +401,9 @@ static void spatial_decompose97i(DWTELEM *buffer, int width, int height,
         DWTELEM *b5 = buffer + mirror(y + 4, height - 1) * stride;
 
         if (y + 3 < (unsigned)height)
-            horizontal_decompose97i(b4, width);
+            horizontal_decompose97i(b4, temp, width);
         if (y + 4 < (unsigned)height)
-            horizontal_decompose97i(b5, width);
+            horizontal_decompose97i(b5, temp, width);
 
         if (y + 3 < (unsigned)height)
             vertical_decompose97iH0(b3, b4, b5, width);
@@ -423,20 +421,20 @@ static void spatial_decompose97i(DWTELEM *buffer, int width, int height,
     }
 }
 
-void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride,
-                    int type, int decomposition_count)
+void ff_spatial_dwt(DWTELEM *buffer, DWTELEM *temp, int width, int height,
+                    int stride, int type, int decomposition_count)
 {
     int level;
 
     for (level = 0; level < decomposition_count; level++) {
         switch (type) {
         case DWT_97:
-            spatial_decompose97i(buffer,
+            spatial_decompose97i(buffer, temp,
                                  width >> level, height >> level,
                                  stride << level);
             break;
         case DWT_53:
-            spatial_decompose53i(buffer,
+            spatial_decompose53i(buffer, temp,
                                  width >> level, height >> level,
                                  stride << level);
             break;
@@ -444,9 +442,8 @@ void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride,
     }
 }
 
-static void horizontal_compose53i(IDWTELEM *b, int width)
+static void horizontal_compose53i(IDWTELEM *b, IDWTELEM *temp, int width)
 {
-    IDWTELEM temp[width];
     const int width2 = width >> 1;
     const int w2     = (width + 1) >> 1;
     int x;
@@ -506,6 +503,7 @@ static void spatial_compose53i_init(DWTCompose *cs, IDWTELEM *buffer,
 }
 
 static void spatial_compose53i_dy_buffered(DWTCompose *cs, slice_buffer *sb,
+                                           IDWTELEM *temp,
                                            int width, int height,
                                            int stride_line)
 {
@@ -535,17 +533,18 @@ static void spatial_compose53i_dy_buffered(DWTCompose *cs, slice_buffer *sb,
     }
 
     if (y - 1 < (unsigned)height)
-        horizontal_compose53i(b0, width);
+        horizontal_compose53i(b0, temp, width);
     if (y + 0 < (unsigned)height)
-        horizontal_compose53i(b1, width);
+        horizontal_compose53i(b1, temp, width);
 
     cs->b0  = b2;
     cs->b1  = b3;
     cs->y  += 2;
 }
 
-static void spatial_compose53i_dy(DWTCompose *cs, IDWTELEM *buffer, int width,
-                                  int height, int stride)
+static void spatial_compose53i_dy(DWTCompose *cs, IDWTELEM *buffer,
+                                  IDWTELEM *temp, int width, int height,
+                                  int stride)
 {
     int y        = cs->y;
     IDWTELEM *b0 = cs->b0;
@@ -559,27 +558,26 @@ static void spatial_compose53i_dy(DWTCompose *cs, IDWTELEM *buffer, int width,
         vertical_compose53iH0(b0, b1, b2, width);
 
     if (y - 1 < (unsigned)height)
-        horizontal_compose53i(b0, width);
+        horizontal_compose53i(b0, temp, width);
     if (y + 0 < (unsigned)height)
-        horizontal_compose53i(b1, width);
+        horizontal_compose53i(b1, temp, width);
 
     cs->b0  = b2;
     cs->b1  = b3;
     cs->y  += 2;
 }
 
-static void av_unused spatial_compose53i(IDWTELEM *buffer, int width,
-                                         int height, int stride)
+static void av_unused spatial_compose53i(IDWTELEM *buffer, IDWTELEM *temp,
+                                         int width, int height, int stride)
 {
     DWTCompose cs;
     spatial_compose53i_init(&cs, buffer, height, stride);
     while (cs.y <= height)
-        spatial_compose53i_dy(&cs, buffer, width, height, stride);
+        spatial_compose53i_dy(&cs, buffer, temp, width, height, stride);
 }
 
-void ff_snow_horizontal_compose97i(IDWTELEM *b, int width)
+void ff_snow_horizontal_compose97i(IDWTELEM *b, IDWTELEM *temp, int width)
 {
-    IDWTELEM temp[width];
     const int w2 = (width + 1) >> 1;
 
 #if 0 //maybe more understadable but slower
@@ -693,8 +691,9 @@ static void spatial_compose97i_init(DWTCompose *cs, IDWTELEM *buffer, int height
 }
 
 static void spatial_compose97i_dy_buffered(DWTContext *dsp, DWTCompose *cs,
-                                           slice_buffer *sb, int width,
-                                           int height, int stride_line)
+                                           slice_buffer * sb, IDWTELEM *temp,
+                                           int width, int height,
+                                           int stride_line)
 {
     int y = cs->y;
 
@@ -723,9 +722,9 @@ static void spatial_compose97i_dy_buffered(DWTContext *dsp, DWTCompose *cs,
     }
 
     if (y - 1 < (unsigned)height)
-        dsp->horizontal_compose97i(b0, width);
+        dsp->horizontal_compose97i(b0, temp, width);
     if (y + 0 < (unsigned)height)
-        dsp->horizontal_compose97i(b1, width);
+        dsp->horizontal_compose97i(b1, temp, width);
 
     cs->b0  = b2;
     cs->b1  = b3;
@@ -734,8 +733,9 @@ static void spatial_compose97i_dy_buffered(DWTContext *dsp, DWTCompose *cs,
     cs->y  += 2;
 }
 
-static void spatial_compose97i_dy(DWTCompose *cs, IDWTELEM *buffer, int width,
-                                  int height, int stride)
+static void spatial_compose97i_dy(DWTCompose *cs, IDWTELEM *buffer,
+                                  IDWTELEM *temp, int width, int height,
+                                  int stride)
 {
     int y        = cs->y;
     IDWTELEM *b0 = cs->b0;
@@ -755,9 +755,9 @@ static void spatial_compose97i_dy(DWTCompose *cs, IDWTELEM *buffer, int width,
         vertical_compose97iH0(b0, b1, b2, width);
 
     if (y - 1 < (unsigned)height)
-        ff_snow_horizontal_compose97i(b0, width);
+        ff_snow_horizontal_compose97i(b0, temp, width);
     if (y + 0 < (unsigned)height)
-        ff_snow_horizontal_compose97i(b1, width);
+        ff_snow_horizontal_compose97i(b1, temp, width);
 
     cs->b0  = b2;
     cs->b1  = b3;
@@ -766,13 +766,13 @@ static void spatial_compose97i_dy(DWTCompose *cs, IDWTELEM *buffer, int width,
     cs->y  += 2;
 }
 
-static void av_unused spatial_compose97i(IDWTELEM *buffer, int width,
-                                         int height, int stride)
+static void av_unused spatial_compose97i(IDWTELEM *buffer, IDWTELEM *temp,
+                                         int width, int height, int stride)
 {
     DWTCompose cs;
     spatial_compose97i_init(&cs, buffer, height, stride);
     while (cs.y <= height)
-        spatial_compose97i_dy(&cs, buffer, width, height, stride);
+        spatial_compose97i_dy(&cs, buffer, temp, width, height, stride);
 }
 
 void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer *sb, int width,
@@ -795,9 +795,9 @@ void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer *sb, int width,
 }
 
 void ff_spatial_idwt_buffered_slice(DWTContext *dsp, DWTCompose *cs,
-                                    slice_buffer *slice_buf, int width,
-                                    int height, int stride_line, int type,
-                                    int decomposition_count, int y)
+                                    slice_buffer *slice_buf, IDWTELEM *temp,
+                                    int width, int height, int stride_line,
+                                    int type, int decomposition_count, int y)
 {
     const int support = type == 1 ? 3 : 5;
     int level;
@@ -808,13 +808,13 @@ void ff_spatial_idwt_buffered_slice(DWTContext *dsp, DWTCompose *cs,
         while (cs[level].y <= FFMIN((y >> level) + support, height >> level)) {
             switch (type) {
             case DWT_97:
-                spatial_compose97i_dy_buffered(dsp, cs + level, slice_buf,
+                spatial_compose97i_dy_buffered(dsp, cs + level, slice_buf, temp,
                                                width >> level,
                                                height >> level,
                                                stride_line << level);
                 break;
             case DWT_53:
-                spatial_compose53i_dy_buffered(cs + level, slice_buf,
+                spatial_compose53i_dy_buffered(cs + level, slice_buf, temp,
                                                width >> level,
                                                height >> level,
                                                stride_line << level);
@@ -842,8 +842,9 @@ static void ff_spatial_idwt_init(DWTCompose *cs, IDWTELEM *buffer, int width,
     }
 }
 
-static void ff_spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer, int width,
-                                  int height, int stride, int type,
+static void ff_spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer,
+                                  IDWTELEM *temp, int width, int height,
+                                  int stride, int type,
                                   int decomposition_count, int y)
 {
     const int support = type == 1 ? 3 : 5;
@@ -855,26 +856,26 @@ static void ff_spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer, int width,
         while (cs[level].y <= FFMIN((y >> level) + support, height >> level)) {
             switch (type) {
             case DWT_97:
-                spatial_compose97i_dy(cs + level, buffer, width >> level,
+                spatial_compose97i_dy(cs + level, buffer, temp, width >> level,
                                       height >> level, stride << level);
                 break;
             case DWT_53:
-                spatial_compose53i_dy(cs + level, buffer, width >> level,
+                spatial_compose53i_dy(cs + level, buffer, temp, width >> level,
                                       height >> level, stride << level);
                 break;
             }
         }
 }
 
-void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride,
-                     int type, int decomposition_count)
+void ff_spatial_idwt(IDWTELEM *buffer, IDWTELEM *temp, int width, int height,
+                     int stride, int type, int decomposition_count)
 {
     DWTCompose cs[MAX_DECOMPOSITIONS];
     int y;
     ff_spatial_idwt_init(cs, buffer, width, height, stride, type,
                          decomposition_count);
     for (y = 0; y < height; y += 4)
-        ff_spatial_idwt_slice(cs, buffer, width, height, stride, type,
+        ff_spatial_idwt_slice(cs, buffer, temp, width, height, stride, type,
                               decomposition_count, y);
 }
 
@@ -883,7 +884,7 @@ static inline int w_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size,
 {
     int s, i, j;
     const int dec_count = w == 8 ? 3 : 4;
-    int tmp[32 * 32];
+    int tmp[32 * 32], tmp2[32];
     int level, ori;
     static const int scale[2][2][4][4] = {
         {
@@ -925,7 +926,7 @@ static inline int w_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size,
         pix2 += line_size;
     }
 
-    ff_spatial_dwt(tmp, w, h, 32, type, dec_count);
+    ff_spatial_dwt(tmp, tmp2, w, h, 32, type, dec_count);
 
     s = 0;
     assert(w == h);
diff --git a/libavcodec/dwt.h b/libavcodec/dwt.h
index 9229928f33..771a9bf53e 100644
--- a/libavcodec/dwt.h
+++ b/libavcodec/dwt.h
@@ -50,7 +50,7 @@ typedef struct DWTContext {
     void (*vertical_compose97i)(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
                                 IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5,
                                 int width);
-    void (*horizontal_compose97i)(IDWTELEM *b, int width);
+    void (*horizontal_compose97i)(IDWTELEM *b, IDWTELEM *temp, int width);
     void (*inner_add_yblock)(const uint8_t *obmc, const int obmc_stride,
                              uint8_t **block, int b_w, int b_h, int src_x,
                              int src_y, int src_stride, slice_buffer *sb,
@@ -148,7 +148,7 @@ IDWTELEM *ff_slice_buffer_load_line(slice_buffer *buf, int line);
 void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
                                  IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5,
                                  int width);
-void ff_snow_horizontal_compose97i(IDWTELEM *b, int width);
+void ff_snow_horizontal_compose97i(IDWTELEM *b, IDWTELEM *temp, int width);
 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride,
                               uint8_t **block, int b_w, int b_h, int src_x,
                               int src_y, int src_stride, slice_buffer *sb,
@@ -157,18 +157,18 @@ void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride,
 int ff_w53_32_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
 int ff_w97_32_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
 
-void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type,
-                    int decomposition_count);
+void ff_spatial_dwt(int *buffer, int *temp, int width, int height, int stride,
+                    int type, int decomposition_count);
 
 void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer *sb, int width,
                                    int height, int stride_line, int type,
                                    int decomposition_count);
 void ff_spatial_idwt_buffered_slice(DWTContext *dsp, DWTCompose *cs,
-                                    slice_buffer *slice_buf, int width,
-                                    int height, int stride_line, int type,
-                                    int decomposition_count, int y);
-void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride,
-                     int type, int decomposition_count);
+                                    slice_buffer *slice_buf, IDWTELEM *temp,
+                                    int width, int height, int stride_line,
+                                    int type, int decomposition_count, int y);
+void ff_spatial_idwt(IDWTELEM *buffer, IDWTELEM *temp, int width, int height,
+                     int stride, int type, int decomposition_count);
 
 void ff_dwt_init(DWTContext *c);
 void ff_dwt_init_x86(DWTContext *c);
diff --git a/libavcodec/snow.c b/libavcodec/snow.c
index 384cda82ac..edd7d075e4 100644
--- a/libavcodec/snow.c
+++ b/libavcodec/snow.c
@@ -440,6 +440,8 @@ av_cold int ff_snow_common_init(AVCodecContext *avctx){
 
     s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
     s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this does not belong here
+    s->temp_dwt_buffer = av_mallocz(width * sizeof(DWTELEM));
+    s->temp_idwt_buffer = av_mallocz(width * sizeof(IDWTELEM));
 
     for(i=0; i<MAX_REF_FRAMES; i++)
         for(j=0; j<MAX_REF_FRAMES; j++)
@@ -618,7 +620,9 @@ av_cold void ff_snow_common_end(SnowContext *s)
     int plane_index, level, orientation, i;
 
     av_freep(&s->spatial_dwt_buffer);
+    av_freep(&s->temp_dwt_buffer);
     av_freep(&s->spatial_idwt_buffer);
+    av_freep(&s->temp_idwt_buffer);
 
     s->m.me.temp= NULL;
     av_freep(&s->m.me.scratchpad);
diff --git a/libavcodec/snow.h b/libavcodec/snow.h
index 5edb8f8af6..3ceb6af99d 100644
--- a/libavcodec/snow.h
+++ b/libavcodec/snow.h
@@ -132,7 +132,9 @@ typedef struct SnowContext{
     int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
     uint32_t *ref_scores[MAX_REF_FRAMES];
     DWTELEM *spatial_dwt_buffer;
+    DWTELEM *temp_dwt_buffer;
     IDWTELEM *spatial_idwt_buffer;
+    IDWTELEM *temp_idwt_buffer;
     int colorspace_type;
     int chroma_h_shift;
     int chroma_v_shift;
diff --git a/libavcodec/snowdec.c b/libavcodec/snowdec.c
index 62ef5f11f4..5dec277eb0 100644
--- a/libavcodec/snowdec.c
+++ b/libavcodec/snowdec.c
@@ -502,7 +502,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPac
             }
 
             for(; yd<slice_h; yd+=4){
-                ff_spatial_idwt_buffered_slice(&s->dwt, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
+                ff_spatial_idwt_buffered_slice(&s->dwt, cs, &s->sb, s->temp_idwt_buffer, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
             }
 
             if(s->qlog == LOSSLESS_QLOG){
diff --git a/libavcodec/snowenc.c b/libavcodec/snowenc.c
index 7b010e1f1c..7e3b68fb8a 100644
--- a/libavcodec/snowenc.c
+++ b/libavcodec/snowenc.c
@@ -93,7 +93,7 @@ static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, i
     //FIXME pass the copy cleanly ?
 
 //    memcpy(dwt_buffer, buffer, height * stride * sizeof(DWTELEM));
-    ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count);
+    ff_spatial_dwt(buffer, s->temp_dwt_buffer, width, height, stride, type, s->spatial_decomposition_count);
 
     for(level=0; level<s->spatial_decomposition_count; level++){
         for(orientation=level ? 1 : 0; orientation<4; orientation++){
@@ -118,7 +118,7 @@ static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, i
                     for(xs= 0; xs<Q2_STEP; xs++){
                         memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
                         dequantize_all(s, p, idwt2_buffer, width, height);
-                        ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
+                        ff_spatial_idwt(idwt2_buffer, s->temp_idwt_buffer, width, height, stride, type, s->spatial_decomposition_count);
                         find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
                         memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
                         for(y=ys; y<b->height; y+= Q2_STEP){
@@ -129,7 +129,7 @@ static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, i
                             }
                         }
                         dequantize_all(s, p, idwt2_buffer, width, height);
-                        ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
+                        ff_spatial_idwt(idwt2_buffer, s->temp_idwt_buffer, width, height, stride, type, s->spatial_decomposition_count);
                         find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
                         for(y=ys; y<b->height; y+= Q2_STEP){
                             for(x=xs; x<b->width; x+= Q2_STEP){
@@ -1586,7 +1586,7 @@ static void calculate_visual_weight(SnowContext *s, Plane *p){
 
             memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height);
             ibuf[b->width/2 + b->height/2*b->stride]= 256*16;
-            ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
+            ff_spatial_idwt(s->spatial_idwt_buffer, s->temp_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
             for(y=0; y<height; y++){
                 for(x=0; x<width; x++){
                     int64_t d= s->spatial_idwt_buffer[x + y*width]*16;
@@ -1775,7 +1775,7 @@ redo_frame:
             /*  if(QUANTIZE2)
                 dwt_quantize(s, p, s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type);
             else*/
-                ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
+                ff_spatial_dwt(s->spatial_dwt_buffer, s->temp_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
 
             if(s->pass1_rc && plane_index==0){
                 int delta_qlog = ratecontrol_1pass(s, pic);
@@ -1814,7 +1814,7 @@ redo_frame:
                 }
             }
 
-            ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
+            ff_spatial_idwt(s->spatial_idwt_buffer, s->temp_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
             if(s->qlog == LOSSLESS_QLOG){
                 for(y=0; y<h; y++){
                     for(x=0; x<w; x++){
diff --git a/libavcodec/x86/snowdsp_mmx.c b/libavcodec/x86/snowdsp_mmx.c
index 00b62616cb..729a13a4ce 100644
--- a/libavcodec/x86/snowdsp_mmx.c
+++ b/libavcodec/x86/snowdsp_mmx.c
@@ -26,9 +26,8 @@
 #include "libavcodec/dwt.h"
 #include "dsputil_mmx.h"
 
-static void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){
+static void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, IDWTELEM *temp, int width){
     const int w2= (width+1)>>1;
-    DECLARE_ALIGNED(16, IDWTELEM, temp)[width>>1];
     const int w_l= (width>>1);
     const int w_r= w2 - 1;
     int i;
@@ -215,9 +214,8 @@ static void ff_snow_horizontal_compose97i_sse2(IDWTELEM *b, int width){
     }
 }
 
-static void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, int width){
+static void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, IDWTELEM *temp, int width){
     const int w2= (width+1)>>1;
-    IDWTELEM temp[width >> 1];
     const int w_l= (width>>1);
     const int w_r= w2 - 1;
     int i;

From becc3c629b6d2be3171a26587f7ca999ef7a9d86 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Mon, 18 Jun 2012 00:05:52 +0300
Subject: [PATCH 08/14] configure: Sort the library listings in the help text
 alphabetically
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Only these three libraries were out of order, the rest was already
neatly sorted.

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 configure | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/configure b/configure
index d614366b2d..a5d27870a2 100755
--- a/configure
+++ b/configure
@@ -166,9 +166,6 @@ External library support:
   --enable-bzlib           enable bzlib [autodetect]
   --enable-frei0r          enable frei0r video filtering
   --enable-gnutls          enable gnutls [no]
-  --enable-libopencore-amrnb enable AMR-NB de/encoding via libopencore-amrnb [no]
-  --enable-libopencore-amrwb enable AMR-WB decoding via libopencore-amrwb [no]
-  --enable-libopencv       enable video filtering via libopencv [no]
   --enable-libcdio         enable audio CD grabbing with libcdio
   --enable-libdc1394       enable IIDC-1394 grabbing using libdc1394
                            and libraw1394 [no]
@@ -176,6 +173,9 @@ External library support:
   --enable-libfreetype     enable libfreetype [no]
   --enable-libgsm          enable GSM support via libgsm [no]
   --enable-libmp3lame      enable MP3 encoding via libmp3lame [no]
+  --enable-libopencore-amrnb enable AMR-NB de/encoding via libopencore-amrnb [no]
+  --enable-libopencore-amrwb enable AMR-WB decoding via libopencore-amrwb [no]
+  --enable-libopencv       enable video filtering via libopencv [no]
   --enable-libopenjpeg     enable JPEG 2000 decoding via OpenJPEG [no]
   --enable-libpulse        enable Pulseaudio input via libpulse [no]
   --enable-librtmp         enable RTMP[E] support via librtmp [no]

From d77f4afa9814b0433be6fdbfd7d8a113592ba680 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Mon, 28 May 2012 12:11:26 +0300
Subject: [PATCH 09/14] rtpenc: Allow requesting H264 RTP packetization mode 0
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This requires all NAL units to fit within single RTP packets. It
doesn't change the actual packetization for packets that fit, but
errors out and gives a helpful hint if the NAL units would have to
be split, and signals the right packetization mode in the SDP.

Signed-off-by: Martin Storsjö <martin@martin.st>
---
 libavformat/rtpenc.h      |  2 ++
 libavformat/rtpenc_h264.c |  6 ++++++
 libavformat/sdp.c         | 11 ++++++++---
 libavformat/version.h     |  2 +-
 4 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/libavformat/rtpenc.h b/libavformat/rtpenc.h
index b8a3fd1ad4..f81cc7a6b8 100644
--- a/libavformat/rtpenc.h
+++ b/libavformat/rtpenc.h
@@ -66,12 +66,14 @@ typedef struct RTPMuxContext RTPMuxContext;
 #define FF_RTP_FLAG_MP4A_LATM 1
 #define FF_RTP_FLAG_RFC2190   2
 #define FF_RTP_FLAG_SKIP_RTCP 4
+#define FF_RTP_FLAG_H264_MODE0 8
 
 #define FF_RTP_FLAG_OPTS(ctx, fieldname) \
     { "rtpflags", "RTP muxer flags", offsetof(ctx, fieldname), AV_OPT_TYPE_FLAGS, {.dbl = 0}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "rtpflags" }, \
     { "latm", "Use MP4A-LATM packetization instead of MPEG4-GENERIC for AAC", 0, AV_OPT_TYPE_CONST, {.dbl = FF_RTP_FLAG_MP4A_LATM}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "rtpflags" }, \
     { "rfc2190", "Use RFC 2190 packetization instead of RFC 4629 for H.263", 0, AV_OPT_TYPE_CONST, {.dbl = FF_RTP_FLAG_RFC2190}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "rtpflags" }, \
     { "skip_rtcp", "Don't send RTCP sender reports", 0, AV_OPT_TYPE_CONST, {.dbl = FF_RTP_FLAG_SKIP_RTCP}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "rtpflags" }, \
+    { "h264_mode0", "Use mode 0 for H264 in RTP", 0, AV_OPT_TYPE_CONST, {.dbl = FF_RTP_FLAG_H264_MODE0}, INT_MIN, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM, "rtpflags" }, \
 
 void ff_rtp_send_data(AVFormatContext *s1, const uint8_t *buf1, int len, int m);
 
diff --git a/libavformat/rtpenc_h264.c b/libavformat/rtpenc_h264.c
index 776da83a62..ac74074307 100644
--- a/libavformat/rtpenc_h264.c
+++ b/libavformat/rtpenc_h264.c
@@ -55,6 +55,12 @@ static void nal_send(AVFormatContext *s1, const uint8_t *buf, int size, int last
         uint8_t type = buf[0] & 0x1F;
         uint8_t nri = buf[0] & 0x60;
 
+        if (s->flags & FF_RTP_FLAG_H264_MODE0) {
+            av_log(s1, AV_LOG_ERROR,
+                   "NAL size %d > %d, try -slice-max-size %d\n", size,
+                   s->max_payload_size, s->max_payload_size);
+            return;
+        }
         av_log(s1, AV_LOG_DEBUG, "NAL size %d > %d\n", size, s->max_payload_size);
         s->buf[0] = 28;        /* FU Indicator; Type = 28 ---> FU-A */
         s->buf[0] |= nri;
diff --git a/libavformat/sdp.c b/libavformat/sdp.c
index 9692aabbc0..7df8b13b67 100644
--- a/libavformat/sdp.c
+++ b/libavformat/sdp.c
@@ -388,15 +388,20 @@ static char *sdp_write_media_attributes(char *buff, int size, AVCodecContext *c,
     char *config = NULL;
 
     switch (c->codec_id) {
-        case CODEC_ID_H264:
+        case CODEC_ID_H264: {
+            int mode = 1;
+            if (fmt && fmt->oformat->priv_class &&
+                av_opt_flag_is_set(fmt->priv_data, "rtpflags", "h264_mode0"))
+                mode = 0;
             if (c->extradata_size) {
                 config = extradata2psets(c);
             }
             av_strlcatf(buff, size, "a=rtpmap:%d H264/90000\r\n"
-                                    "a=fmtp:%d packetization-mode=1%s\r\n",
+                                    "a=fmtp:%d packetization-mode=%d%s\r\n",
                                      payload_type,
-                                     payload_type, config ? config : "");
+                                     payload_type, mode, config ? config : "");
             break;
+        }
         case CODEC_ID_H263:
         case CODEC_ID_H263P:
             /* a=framesize is required by 3GPP TS 26.234 (PSS). It
diff --git a/libavformat/version.h b/libavformat/version.h
index aae0eb1d70..e1703319b6 100644
--- a/libavformat/version.h
+++ b/libavformat/version.h
@@ -31,7 +31,7 @@
 
 #define LIBAVFORMAT_VERSION_MAJOR 54
 #define LIBAVFORMAT_VERSION_MINOR  4
-#define LIBAVFORMAT_VERSION_MICRO  0
+#define LIBAVFORMAT_VERSION_MICRO  1
 
 #define LIBAVFORMAT_VERSION_INT AV_VERSION_INT(LIBAVFORMAT_VERSION_MAJOR, \
                                                LIBAVFORMAT_VERSION_MINOR, \

From 7457ebee3ad08e626ed8e7aafb9117754162f17b Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sat, 16 Jun 2012 16:14:21 +0100
Subject: [PATCH 10/14] fate: vorbis: add 5.1 surround test

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 tests/fate/vorbis.mak | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tests/fate/vorbis.mak b/tests/fate/vorbis.mak
index 802c529e4d..774cb350d1 100644
--- a/tests/fate/vorbis.mak
+++ b/tests/fate/vorbis.mak
@@ -76,6 +76,10 @@ FATE_VORBIS += fate-vorbis-19
 fate-vorbis-19: CMD = pcm -i $(SAMPLES)/vorbis/test-short2_small.ogg
 fate-vorbis-19: REF = $(SAMPLES)/vorbis/test-short2_small.pcm
 
+FATE_VORBIS += fate-vorbis-20
+fate-vorbis-20: CMD = pcm -i $(SAMPLES)/vorbis/6.ogg
+fate-vorbis-20: REF = $(SAMPLES)/vorbis/6.pcm
+
 FATE_SAMPLES_AVCONV += $(FATE_VORBIS)
 fate-vorbis: $(FATE_VORBIS)
 $(FATE_VORBIS): CMP = oneoff

From 9fcda25e35bc012ee9a434faf0fb00bece85be6d Mon Sep 17 00:00:00 2001
From: Mans Rullgard <mans@mansr.com>
Date: Sat, 16 Jun 2012 18:08:03 +0100
Subject: [PATCH 11/14] vorbisdec: replace div/mod in loop with a counter

2x speedup of surround decoding on Cortex-A9.

Signed-off-by: Mans Rullgard <mans@mansr.com>
---
 libavcodec/vorbisdec.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/libavcodec/vorbisdec.c b/libavcodec/vorbisdec.c
index 08198d732a..deaaaa21c2 100644
--- a/libavcodec/vorbisdec.c
+++ b/libavcodec/vorbisdec.c
@@ -1409,17 +1409,24 @@ static av_always_inline int vorbis_residue_decode_internal(vorbis_context *vc,
                                 }
 
                             } else if (vr_type == 2) {
-                                voffs = voffset;
+                                unsigned voffs_div = FASTDIV(voffset, ch);
+                                unsigned voffs_mod = voffset - voffs_div * ch;
 
                                 for (k = 0; k < step; ++k) {
                                     coffs = get_vlc2(gb, codebook.vlc.table, codebook.nb_bits, 3) * dim;
-                                    for (l = 0; l < dim; ++l, ++voffs) {
-                                        vec[voffs / ch + (voffs % ch) * vlen] += codebook.codevectors[coffs + l];  // FPMATH FIXME use if and counter instead of / and %
+                                    for (l = 0; l < dim; ++l) {
+                                        vec[voffs_div + voffs_mod * vlen] +=
+                                            codebook.codevectors[coffs + l];
 
                                         av_dlog(NULL, " pass %d offs: %d curr: %f change: %f cv offs.: %d+%d  \n",
-                                                pass, voffset / ch + (voffs % ch) * vlen,
-                                                vec[voffset / ch + (voffs % ch) * vlen],
+                                                pass, voffs_div + voffs_mod * vlen,
+                                                vec[voffs_div + voffs_mod * vlen],
                                                 codebook.codevectors[coffs + l], coffs, l);
+
+                                        if (++voffs_mod == ch) {
+                                            voffs_div++;
+                                            voffs_mod = 0;
+                                        }
                                     }
                                 }
                             }

From 3971be0eb5d382b85f1d8772c51f21cccdc68eab Mon Sep 17 00:00:00 2001
From: John Stebbins <jstebbins.hb@gmail.com>
Date: Sat, 9 Jun 2012 13:45:49 -0700
Subject: [PATCH 12/14] Add Dolby/DPLII downmix support to libavresample

Signed-off-by: Justin Ruggles <justin.ruggles@gmail.com>
---
 doc/APIchanges                   |  3 ++
 libavresample/audio_mix.c        |  3 +-
 libavresample/audio_mix_matrix.c | 60 +++++++++++++++++++++++++++-----
 libavresample/avresample.h       |  3 +-
 libavresample/internal.h         |  1 +
 libavresample/options.c          |  4 +++
 libavresample/version.h          |  2 +-
 libavutil/audioconvert.h         |  7 ++++
 8 files changed, 72 insertions(+), 11 deletions(-)

diff --git a/doc/APIchanges b/doc/APIchanges
index 50cc787bad..4d14882f48 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -13,6 +13,9 @@ libavutil:     2011-04-18
 
 API changes, most recent first:
 
+2012-xx-xx - xxxxxxx - lavr 0.0.3
+  Add a parameter to avresample_build_matrix() for Dolby/DPLII downmixing.
+
 2012-xx-xx - xxxxxxx - lavfi 2.23.0 - avfilter.h
   Add AVFilterContext.nb_inputs/outputs. Deprecate
   AVFilterContext.input/output_count.
diff --git a/libavresample/audio_mix.c b/libavresample/audio_mix.c
index 7ab11b0d4d..93192221cd 100644
--- a/libavresample/audio_mix.c
+++ b/libavresample/audio_mix.c
@@ -320,7 +320,8 @@ int ff_audio_mix_init(AVAudioResampleContext *avr)
                                       avr->center_mix_level,
                                       avr->surround_mix_level,
                                       avr->lfe_mix_level, 1, matrix_dbl,
-                                      avr->in_channels);
+                                      avr->in_channels,
+                                      avr->matrix_encoding);
         if (ret < 0) {
             av_free(matrix_dbl);
             return ret;
diff --git a/libavresample/audio_mix_matrix.c b/libavresample/audio_mix_matrix.c
index 6135b02422..f7121c846d 100644
--- a/libavresample/audio_mix_matrix.c
+++ b/libavresample/audio_mix_matrix.c
@@ -54,6 +54,8 @@
 #define SURROUND_DIRECT_LEFT   33
 #define SURROUND_DIRECT_RIGHT  34
 
+#define SQRT3_2      1.22474487139158904909  /* sqrt(3/2) */
+
 static av_always_inline int even(uint64_t layout)
 {
     return (!layout || (layout & (layout - 1)));
@@ -83,14 +85,21 @@ static int sane_layout(uint64_t layout)
 int avresample_build_matrix(uint64_t in_layout, uint64_t out_layout,
                             double center_mix_level, double surround_mix_level,
                             double lfe_mix_level, int normalize,
-                            double *matrix_out, int stride)
+                            double *matrix_out, int stride,
+                            enum AVMatrixEncoding matrix_encoding)
 {
     int i, j, out_i, out_j;
     double matrix[64][64] = {{0}};
-    int64_t unaccounted = in_layout & ~out_layout;
+    int64_t unaccounted;
     double maxcoef = 0;
     int in_channels, out_channels;
 
+    if ((out_layout & AV_CH_LAYOUT_STEREO_DOWNMIX) == AV_CH_LAYOUT_STEREO_DOWNMIX) {
+        out_layout = AV_CH_LAYOUT_STEREO;
+    }
+
+    unaccounted = in_layout & ~out_layout;
+
     in_channels  = av_get_channel_layout_nb_channels( in_layout);
     out_channels = av_get_channel_layout_nb_channels(out_layout);
 
@@ -140,8 +149,19 @@ int avresample_build_matrix(uint64_t in_layout, uint64_t out_layout,
             matrix[SIDE_LEFT ][BACK_CENTER] += M_SQRT1_2;
             matrix[SIDE_RIGHT][BACK_CENTER] += M_SQRT1_2;
         } else if (out_layout & AV_CH_FRONT_LEFT) {
-            matrix[FRONT_LEFT ][BACK_CENTER] += surround_mix_level * M_SQRT1_2;
-            matrix[FRONT_RIGHT][BACK_CENTER] += surround_mix_level * M_SQRT1_2;
+            if (matrix_encoding == AV_MATRIX_ENCODING_DOLBY ||
+                matrix_encoding == AV_MATRIX_ENCODING_DPLII) {
+                if (unaccounted & (AV_CH_BACK_LEFT | AV_CH_SIDE_LEFT)) {
+                    matrix[FRONT_LEFT ][BACK_CENTER] -= surround_mix_level * M_SQRT1_2;
+                    matrix[FRONT_RIGHT][BACK_CENTER] += surround_mix_level * M_SQRT1_2;
+                } else {
+                    matrix[FRONT_LEFT ][BACK_CENTER] -= surround_mix_level;
+                    matrix[FRONT_RIGHT][BACK_CENTER] += surround_mix_level;
+                }
+            } else {
+                matrix[FRONT_LEFT ][BACK_CENTER] += surround_mix_level * M_SQRT1_2;
+                matrix[FRONT_RIGHT][BACK_CENTER] += surround_mix_level * M_SQRT1_2;
+            }
         } else if (out_layout & AV_CH_FRONT_CENTER) {
             matrix[FRONT_CENTER][BACK_CENTER] += surround_mix_level * M_SQRT1_2;
         } else
@@ -163,8 +183,20 @@ int avresample_build_matrix(uint64_t in_layout, uint64_t out_layout,
                 matrix[SIDE_RIGHT][BACK_RIGHT] += 1.0;
             }
         } else if (out_layout & AV_CH_FRONT_LEFT) {
-            matrix[FRONT_LEFT ][BACK_LEFT ] += surround_mix_level;
-            matrix[FRONT_RIGHT][BACK_RIGHT] += surround_mix_level;
+            if (matrix_encoding == AV_MATRIX_ENCODING_DOLBY) {
+                matrix[FRONT_LEFT ][BACK_LEFT ] -= surround_mix_level * M_SQRT1_2;
+                matrix[FRONT_LEFT ][BACK_RIGHT] -= surround_mix_level * M_SQRT1_2;
+                matrix[FRONT_RIGHT][BACK_LEFT ] += surround_mix_level * M_SQRT1_2;
+                matrix[FRONT_RIGHT][BACK_RIGHT] += surround_mix_level * M_SQRT1_2;
+            } else if (matrix_encoding == AV_MATRIX_ENCODING_DPLII) {
+                matrix[FRONT_LEFT ][BACK_LEFT ] -= surround_mix_level * SQRT3_2;
+                matrix[FRONT_LEFT ][BACK_RIGHT] -= surround_mix_level * M_SQRT1_2;
+                matrix[FRONT_RIGHT][BACK_LEFT ] += surround_mix_level * M_SQRT1_2;
+                matrix[FRONT_RIGHT][BACK_RIGHT] += surround_mix_level * SQRT3_2;
+            } else {
+                matrix[FRONT_LEFT ][BACK_LEFT ] += surround_mix_level;
+                matrix[FRONT_RIGHT][BACK_RIGHT] += surround_mix_level;
+            }
         } else if (out_layout & AV_CH_FRONT_CENTER) {
             matrix[FRONT_CENTER][BACK_LEFT ] += surround_mix_level * M_SQRT1_2;
             matrix[FRONT_CENTER][BACK_RIGHT] += surround_mix_level * M_SQRT1_2;
@@ -187,8 +219,20 @@ int avresample_build_matrix(uint64_t in_layout, uint64_t out_layout,
             matrix[BACK_CENTER][SIDE_LEFT ] += M_SQRT1_2;
             matrix[BACK_CENTER][SIDE_RIGHT] += M_SQRT1_2;
         } else if (out_layout & AV_CH_FRONT_LEFT) {
-            matrix[FRONT_LEFT ][SIDE_LEFT ] += surround_mix_level;
-            matrix[FRONT_RIGHT][SIDE_RIGHT] += surround_mix_level;
+            if (matrix_encoding == AV_MATRIX_ENCODING_DOLBY) {
+                matrix[FRONT_LEFT ][SIDE_LEFT ] -= surround_mix_level * M_SQRT1_2;
+                matrix[FRONT_LEFT ][SIDE_RIGHT] -= surround_mix_level * M_SQRT1_2;
+                matrix[FRONT_RIGHT][SIDE_LEFT ] += surround_mix_level * M_SQRT1_2;
+                matrix[FRONT_RIGHT][SIDE_RIGHT] += surround_mix_level * M_SQRT1_2;
+            } else if (matrix_encoding == AV_MATRIX_ENCODING_DPLII) {
+                matrix[FRONT_LEFT ][SIDE_LEFT ] -= surround_mix_level * SQRT3_2;
+                matrix[FRONT_LEFT ][SIDE_RIGHT] -= surround_mix_level * M_SQRT1_2;
+                matrix[FRONT_RIGHT][SIDE_LEFT ] += surround_mix_level * M_SQRT1_2;
+                matrix[FRONT_RIGHT][SIDE_RIGHT] += surround_mix_level * SQRT3_2;
+            } else {
+                matrix[FRONT_LEFT ][SIDE_LEFT ] += surround_mix_level;
+                matrix[FRONT_RIGHT][SIDE_RIGHT] += surround_mix_level;
+            }
         } else if (out_layout & AV_CH_FRONT_CENTER) {
             matrix[FRONT_CENTER][SIDE_LEFT ] += surround_mix_level * M_SQRT1_2;
             matrix[FRONT_CENTER][SIDE_RIGHT] += surround_mix_level * M_SQRT1_2;
diff --git a/libavresample/avresample.h b/libavresample/avresample.h
index 65d4d2d6e2..002bec21fb 100644
--- a/libavresample/avresample.h
+++ b/libavresample/avresample.h
@@ -131,12 +131,13 @@ void avresample_free(AVAudioResampleContext **avr);
  *                            the weight of input channel i in output channel o.
  * @param stride              distance between adjacent input channels in the
  *                            matrix array
+ * @param matrix_encoding     matrixed stereo downmix mode (e.g. dplii)
  * @return                    0 on success, negative AVERROR code on failure
  */
 int avresample_build_matrix(uint64_t in_layout, uint64_t out_layout,
                             double center_mix_level, double surround_mix_level,
                             double lfe_mix_level, int normalize, double *matrix,
-                            int stride);
+                            int stride, enum AVMatrixEncoding matrix_encoding);
 
 /**
  * Get the current channel mixing matrix.
diff --git a/libavresample/internal.h b/libavresample/internal.h
index 49ea6a668e..fa9499a8ef 100644
--- a/libavresample/internal.h
+++ b/libavresample/internal.h
@@ -70,6 +70,7 @@ struct AVAudioResampleContext {
     AudioConvert *ac_out;       /**< output sample format conversion context */
     ResampleContext *resample;  /**< resampling context                      */
     AudioMix *am;               /**< channel mixing context                  */
+    enum AVMatrixEncoding matrix_encoding;      /**< matrixed stereo encoding */
 };
 
 #endif /* AVRESAMPLE_INTERNAL_H */
diff --git a/libavresample/options.c b/libavresample/options.c
index 5430c4ddf2..a1a0b0ca21 100644
--- a/libavresample/options.c
+++ b/libavresample/options.c
@@ -52,6 +52,10 @@ static const AVOption options[] = {
     { "phase_shift",            "Resampling Phase Shift",   OFFSET(phase_shift),            AV_OPT_TYPE_INT,    { 10                    }, 0,                    30, /* ??? */           PARAM },
     { "linear_interp",          "Use Linear Interpolation", OFFSET(linear_interp),          AV_OPT_TYPE_INT,    { 0                     }, 0,                    1,                      PARAM },
     { "cutoff",                 "Cutoff Frequency Ratio",   OFFSET(cutoff),                 AV_OPT_TYPE_DOUBLE, { 0.8                   }, 0.0,                  1.0,                    PARAM },
+    { "matrix_encoding",        "Matrixed Stereo Encoding", OFFSET(matrix_encoding),        AV_OPT_TYPE_INT,    { AV_MATRIX_ENCODING_NONE}, AV_MATRIX_ENCODING_NONE,     AV_MATRIX_ENCODING_NB-1, PARAM, "matrix_encoding" },
+        { "none",  "None",               0, AV_OPT_TYPE_CONST, { AV_MATRIX_ENCODING_NONE  }, INT_MIN, INT_MAX, PARAM, "matrix_encoding" },
+        { "dolby", "Dolby",              0, AV_OPT_TYPE_CONST, { AV_MATRIX_ENCODING_DOLBY }, INT_MIN, INT_MAX, PARAM, "matrix_encoding" },
+        { "dplii", "Dolby Pro Logic II", 0, AV_OPT_TYPE_CONST, { AV_MATRIX_ENCODING_DPLII }, INT_MIN, INT_MAX, PARAM, "matrix_encoding" },
     { NULL },
 };
 
diff --git a/libavresample/version.h b/libavresample/version.h
index 6211a56352..63f07f5e84 100644
--- a/libavresample/version.h
+++ b/libavresample/version.h
@@ -21,7 +21,7 @@
 
 #define LIBAVRESAMPLE_VERSION_MAJOR  0
 #define LIBAVRESAMPLE_VERSION_MINOR  0
-#define LIBAVRESAMPLE_VERSION_MICRO  2
+#define LIBAVRESAMPLE_VERSION_MICRO  3
 
 #define LIBAVRESAMPLE_VERSION_INT  AV_VERSION_INT(LIBAVRESAMPLE_VERSION_MAJOR, \
                                                   LIBAVRESAMPLE_VERSION_MINOR, \
diff --git a/libavutil/audioconvert.h b/libavutil/audioconvert.h
index 691c64a9de..7e79097855 100644
--- a/libavutil/audioconvert.h
+++ b/libavutil/audioconvert.h
@@ -101,6 +101,13 @@
 #define AV_CH_LAYOUT_OCTAGONAL         (AV_CH_LAYOUT_5POINT0|AV_CH_BACK_LEFT|AV_CH_BACK_CENTER|AV_CH_BACK_RIGHT)
 #define AV_CH_LAYOUT_STEREO_DOWNMIX    (AV_CH_STEREO_LEFT|AV_CH_STEREO_RIGHT)
 
+enum AVMatrixEncoding {
+    AV_MATRIX_ENCODING_NONE,
+    AV_MATRIX_ENCODING_DOLBY,
+    AV_MATRIX_ENCODING_DPLII,
+    AV_MATRIX_ENCODING_NB
+};
+
 /**
  * @}
  */

From 29f7490c461431b5c00e496f3e0253c170b3924c Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Tue, 29 May 2012 17:03:40 -0400
Subject: [PATCH 13/14] lavr: add x86-optimized functions for mixing 1-to-2
 fltp with flt coeffs

---
 libavresample/x86/audio_mix.asm    | 34 ++++++++++++++++++++++++++++++
 libavresample/x86/audio_mix_init.c |  9 ++++++++
 2 files changed, 43 insertions(+)

diff --git a/libavresample/x86/audio_mix.asm b/libavresample/x86/audio_mix.asm
index 8a4cf061cd..2bc89cac92 100644
--- a/libavresample/x86/audio_mix.asm
+++ b/libavresample/x86/audio_mix.asm
@@ -150,3 +150,37 @@ cglobal mix_2_to_1_s16p_q8, 3,4,6, src, matrix, len, src1
     sub        lend, mmsize/2
     jg .loop
     REP_RET
+
+;-----------------------------------------------------------------------------
+; void ff_mix_1_to_2_fltp_flt(float **src, float **matrix, int len,
+;                             int out_ch, int in_ch);
+;-----------------------------------------------------------------------------
+
+%macro MIX_1_TO_2_FLTP_FLT 0
+cglobal mix_1_to_2_fltp_flt, 3,5,4, src0, matrix0, len, src1, matrix1
+    mov       src1q, [src0q+gprsize]
+    mov       src0q, [src0q]
+    sub       src1q, src0q
+    mov    matrix1q, [matrix0q+gprsize]
+    mov    matrix0q, [matrix0q]
+    VBROADCASTSS m2, [matrix0q]
+    VBROADCASTSS m3, [matrix1q]
+    ALIGN 16
+.loop:
+    mova         m0, [src0q]
+    mulps        m1, m0, m3
+    mulps        m0, m0, m2
+    mova  [src0q      ], m0
+    mova  [src0q+src1q], m1
+    add       src0q, mmsize
+    sub        lend, mmsize/4
+    jg .loop
+    REP_RET
+%endmacro
+
+INIT_XMM sse
+MIX_1_TO_2_FLTP_FLT
+%if HAVE_AVX
+INIT_YMM avx
+MIX_1_TO_2_FLTP_FLT
+%endif
diff --git a/libavresample/x86/audio_mix_init.c b/libavresample/x86/audio_mix_init.c
index fa204d6d36..aede260190 100644
--- a/libavresample/x86/audio_mix_init.c
+++ b/libavresample/x86/audio_mix_init.c
@@ -35,6 +35,11 @@ extern void ff_mix_2_to_1_s16p_flt_sse4(int16_t **src, float **matrix, int len,
 extern void ff_mix_2_to_1_s16p_q8_sse2(int16_t **src, int16_t **matrix,
                                        int len, int out_ch, int in_ch);
 
+extern void ff_mix_1_to_2_fltp_flt_sse(float **src, float **matrix, int len,
+                                       int out_ch, int in_ch);
+extern void ff_mix_1_to_2_fltp_flt_avx(float **src, float **matrix, int len,
+                                       int out_ch, int in_ch);
+
 av_cold void ff_audio_mix_init_x86(AudioMix *am)
 {
 #if HAVE_YASM
@@ -43,6 +48,8 @@ av_cold void ff_audio_mix_init_x86(AudioMix *am)
     if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) {
         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
                               2, 1, 16, 8, "SSE", ff_mix_2_to_1_fltp_flt_sse);
+        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
+                              1, 2, 16, 4, "SSE", ff_mix_1_to_2_fltp_flt_sse);
     }
     if (mm_flags & AV_CPU_FLAG_SSE2 && HAVE_SSE) {
         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
@@ -57,6 +64,8 @@ av_cold void ff_audio_mix_init_x86(AudioMix *am)
     if (mm_flags & AV_CPU_FLAG_AVX && HAVE_AVX) {
         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
                               2, 1, 32, 16, "AVX", ff_mix_2_to_1_fltp_flt_avx);
+        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
+                              1, 2, 32, 8, "AVX", ff_mix_1_to_2_fltp_flt_avx);
     }
 #endif
 }

From f61ce90caa909d131ea6ec205823568a38115529 Mon Sep 17 00:00:00 2001
From: Justin Ruggles <justin.ruggles@gmail.com>
Date: Tue, 29 May 2012 17:03:56 -0400
Subject: [PATCH 14/14] lavr: add x86-optimized functions for mixing 1-to-2
 s16p with flt coeffs

---
 libavresample/x86/audio_mix.asm    | 47 ++++++++++++++++++++++++++++++
 libavresample/x86/audio_mix_init.c | 13 +++++++++
 2 files changed, 60 insertions(+)

diff --git a/libavresample/x86/audio_mix.asm b/libavresample/x86/audio_mix.asm
index 2bc89cac92..4b0434dd6d 100644
--- a/libavresample/x86/audio_mix.asm
+++ b/libavresample/x86/audio_mix.asm
@@ -184,3 +184,50 @@ MIX_1_TO_2_FLTP_FLT
 INIT_YMM avx
 MIX_1_TO_2_FLTP_FLT
 %endif
+
+;-----------------------------------------------------------------------------
+; void ff_mix_1_to_2_s16p_flt(int16_t **src, float **matrix, int len,
+;                             int out_ch, int in_ch);
+;-----------------------------------------------------------------------------
+
+%macro MIX_1_TO_2_S16P_FLT 0
+cglobal mix_1_to_2_s16p_flt, 3,5,6, src0, matrix0, len, src1, matrix1
+    mov       src1q, [src0q+gprsize]
+    mov       src0q, [src0q]
+    sub       src1q, src0q
+    mov    matrix1q, [matrix0q+gprsize]
+    mov    matrix0q, [matrix0q]
+    VBROADCASTSS m4, [matrix0q]
+    VBROADCASTSS m5, [matrix1q]
+    ALIGN 16
+.loop:
+    mova         m0, [src0q]
+    S16_TO_S32_SX 0, 2
+    cvtdq2ps     m0, m0
+    cvtdq2ps     m2, m2
+    mulps        m1, m0, m5
+    mulps        m0, m0, m4
+    mulps        m3, m2, m5
+    mulps        m2, m2, m4
+    cvtps2dq     m0, m0
+    cvtps2dq     m1, m1
+    cvtps2dq     m2, m2
+    cvtps2dq     m3, m3
+    packssdw     m0, m2
+    packssdw     m1, m3
+    mova  [src0q      ], m0
+    mova  [src0q+src1q], m1
+    add       src0q, mmsize
+    sub        lend, mmsize/2
+    jg .loop
+    REP_RET
+%endmacro
+
+INIT_XMM sse2
+MIX_1_TO_2_S16P_FLT
+INIT_XMM sse4
+MIX_1_TO_2_S16P_FLT
+%if HAVE_AVX
+INIT_XMM avx
+MIX_1_TO_2_S16P_FLT
+%endif
diff --git a/libavresample/x86/audio_mix_init.c b/libavresample/x86/audio_mix_init.c
index aede260190..b8f3a90eef 100644
--- a/libavresample/x86/audio_mix_init.c
+++ b/libavresample/x86/audio_mix_init.c
@@ -40,6 +40,13 @@ extern void ff_mix_1_to_2_fltp_flt_sse(float **src, float **matrix, int len,
 extern void ff_mix_1_to_2_fltp_flt_avx(float **src, float **matrix, int len,
                                        int out_ch, int in_ch);
 
+extern void ff_mix_1_to_2_s16p_flt_sse2(int16_t **src, float **matrix, int len,
+                                        int out_ch, int in_ch);
+extern void ff_mix_1_to_2_s16p_flt_sse4(int16_t **src, float **matrix, int len,
+                                        int out_ch, int in_ch);
+extern void ff_mix_1_to_2_s16p_flt_avx (int16_t **src, float **matrix, int len,
+                                        int out_ch, int in_ch);
+
 av_cold void ff_audio_mix_init_x86(AudioMix *am)
 {
 #if HAVE_YASM
@@ -56,16 +63,22 @@ av_cold void ff_audio_mix_init_x86(AudioMix *am)
                               2, 1, 16, 8, "SSE2", ff_mix_2_to_1_s16p_flt_sse2);
         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_Q8,
                               2, 1, 16, 8, "SSE2", ff_mix_2_to_1_s16p_q8_sse2);
+        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
+                              1, 2, 16, 8, "SSE2", ff_mix_1_to_2_s16p_flt_sse2);
     }
     if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) {
         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
                               2, 1, 16, 8, "SSE4", ff_mix_2_to_1_s16p_flt_sse4);
+        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
+                              1, 2, 16, 8, "SSE4", ff_mix_1_to_2_s16p_flt_sse4);
     }
     if (mm_flags & AV_CPU_FLAG_AVX && HAVE_AVX) {
         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
                               2, 1, 32, 16, "AVX", ff_mix_2_to_1_fltp_flt_avx);
         ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,
                               1, 2, 32, 8, "AVX", ff_mix_1_to_2_fltp_flt_avx);
+        ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,
+                              1, 2, 16, 8, "AVX", ff_mix_1_to_2_s16p_flt_avx);
     }
 #endif
 }