From e93f767bec1c4b58ac24bd59143c0030b9555426 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Wed, 22 Jun 2016 16:23:34 +0300 Subject: crypto: omap-sham - use runtime_pm autosuspend for clock handling Calling runtime PM API for every block causes serious performance hit to crypto operations that are done on a long buffer. As crypto is performed on a page boundary, encrypting large buffers can cause a series of crypto operations divided by page. The runtime PM API is also called those many times. Convert the driver to use runtime_pm autosuspend instead, with a default timeout value of 1 second. This results in upto ~50% speedup. Signed-off-by: Tero Kristo Signed-off-by: Herbert Xu --- drivers/crypto/omap-sham.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'drivers/crypto/omap-sham.c') diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index 63464e86f2b1..887bc32b7993 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -100,6 +100,8 @@ #define DEFAULT_TIMEOUT_INTERVAL HZ +#define DEFAULT_AUTOSUSPEND_DELAY 1000 + /* mostly device flags */ #define FLAGS_BUSY 0 #define FLAGS_FINAL 1 @@ -999,7 +1001,8 @@ static void omap_sham_finish_req(struct ahash_request *req, int err) dd->flags &= ~(BIT(FLAGS_BUSY) | BIT(FLAGS_FINAL) | BIT(FLAGS_CPU) | BIT(FLAGS_DMA_READY) | BIT(FLAGS_OUTPUT_READY)); - pm_runtime_put(dd->dev); + pm_runtime_mark_last_busy(dd->dev); + pm_runtime_put_autosuspend(dd->dev); if (req->base.complete) req->base.complete(&req->base, err); @@ -1946,6 +1949,9 @@ static int omap_sham_probe(struct platform_device *pdev) dd->flags |= dd->pdata->flags; + pm_runtime_use_autosuspend(dev); + pm_runtime_set_autosuspend_delay(dev, DEFAULT_AUTOSUSPEND_DELAY); + pm_runtime_enable(dev); pm_runtime_irq_safe(dev); -- cgit From 65e7a549af295cb2034f17e99211b97e9d02cbee Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Wed, 22 Jun 2016 16:23:35 +0300 Subject: crypto: omap-sham - change queue size from 1 to 10 Change crypto queue size from 1 to 10 for omap SHA driver. This should allow clients to enqueue requests more effectively to avoid serializing whole crypto sequences, giving extra performance. Signed-off-by: Tero Kristo Signed-off-by: Herbert Xu --- drivers/crypto/omap-sham.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/crypto/omap-sham.c') diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index 887bc32b7993..8090db0de466 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -175,7 +175,7 @@ struct omap_sham_ctx { struct omap_sham_hmac_ctx base[0]; }; -#define OMAP_SHAM_QUEUE_LENGTH 1 +#define OMAP_SHAM_QUEUE_LENGTH 10 struct omap_sham_algs_info { struct ahash_alg *algs_list; -- cgit From b973eaab68db858cb42f5283b1b0ed6773d8fdd9 Mon Sep 17 00:00:00 2001 From: Lokesh Vutla Date: Wed, 22 Jun 2016 16:23:36 +0300 Subject: crypto: omap - do not call dmaengine_terminate_all The extra call to dmaengine_terminate_all is not needed, as the DMA is not running at this point. This improves performance slightly. Signed-off-by: Lokesh Vutla Signed-off-by: Tero Kristo Signed-off-by: Herbert Xu --- drivers/crypto/omap-aes.c | 2 -- drivers/crypto/omap-sham.c | 1 - 2 files changed, 3 deletions(-) (limited to 'drivers/crypto/omap-sham.c') diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c index 4a0e6a545ba2..8178632de788 100644 --- a/drivers/crypto/omap-aes.c +++ b/drivers/crypto/omap-aes.c @@ -528,8 +528,6 @@ static int omap_aes_crypt_dma_stop(struct omap_aes_dev *dd) omap_aes_dma_stop(dd); - dmaengine_terminate_all(dd->dma_lch_in); - dmaengine_terminate_all(dd->dma_lch_out); return 0; } diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index 8090db0de466..3321f003f465 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -815,7 +815,6 @@ static int omap_sham_update_dma_stop(struct omap_sham_dev *dd) { struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req); - dmaengine_terminate_all(dd->dma_lch); if (ctx->flags & BIT(FLAGS_SG)) { dma_unmap_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE); -- cgit From 85e0687f8fac9032681b163a17f806b52205922e Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Wed, 22 Jun 2016 16:23:37 +0300 Subject: crypto: omap-sham - set sw fallback to 240 bytes Adds software fallback support for small crypto requests. In these cases, it is undesirable to use DMA, as setting it up itself is rather heavy operation. Gives about 40% extra performance in ipsec usecase. Signed-off-by: Bin Liu [t-kristo@ti.com: dropped the extra traces, updated some comments on the code] Signed-off-by: Tero Kristo Signed-off-by: Herbert Xu --- drivers/crypto/omap-sham.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'drivers/crypto/omap-sham.c') diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index 3321f003f465..ae6f841f81ff 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -1095,7 +1095,7 @@ static int omap_sham_update(struct ahash_request *req) ctx->offset = 0; if (ctx->flags & BIT(FLAGS_FINUP)) { - if ((ctx->digcnt + ctx->bufcnt + ctx->total) < 9) { + if ((ctx->digcnt + ctx->bufcnt + ctx->total) < 240) { /* * OMAP HW accel works only with buffers >= 9 * will switch to bypass in final() @@ -1151,9 +1151,13 @@ static int omap_sham_final(struct ahash_request *req) if (ctx->flags & BIT(FLAGS_ERROR)) return 0; /* uncompleted hash is not needed */ - /* OMAP HW accel works only with buffers >= 9 */ - /* HMAC is always >= 9 because ipad == block size */ - if ((ctx->digcnt + ctx->bufcnt) < 9) + /* + * OMAP HW accel works only with buffers >= 9. + * HMAC is always >= 9 because ipad == block size. + * If buffersize is less than 240, we use fallback SW encoding, + * as using DMA + HW in this case doesn't provide any benefit. + */ + if ((ctx->digcnt + ctx->bufcnt) < 240) return omap_sham_final_shash(req); else if (ctx->bufcnt) return omap_sham_enqueue(req, OP_FINAL); -- cgit From eb3547859d73629c888825d6b928f2d0dba5af41 Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Thu, 30 Jun 2016 14:04:11 -0500 Subject: crypto: omap-sham - increase cra_proirity to 400 The arm-neon-sha implementations have cra_priority of 150...300, so increase omap-sham priority to 400 to ensure it is on top of any software alg. Signed-off-by: Bin Liu Signed-off-by: Herbert Xu --- drivers/crypto/omap-sham.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'drivers/crypto/omap-sham.c') diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index ae6f841f81ff..7fe4eef12fe2 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -1334,7 +1334,7 @@ static struct ahash_alg algs_sha1_md5[] = { .halg.base = { .cra_name = "sha1", .cra_driver_name = "omap-sha1", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC | @@ -1357,7 +1357,7 @@ static struct ahash_alg algs_sha1_md5[] = { .halg.base = { .cra_name = "md5", .cra_driver_name = "omap-md5", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC | @@ -1381,7 +1381,7 @@ static struct ahash_alg algs_sha1_md5[] = { .halg.base = { .cra_name = "hmac(sha1)", .cra_driver_name = "omap-hmac-sha1", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC | @@ -1406,7 +1406,7 @@ static struct ahash_alg algs_sha1_md5[] = { .halg.base = { .cra_name = "hmac(md5)", .cra_driver_name = "omap-hmac-md5", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC | @@ -1434,7 +1434,7 @@ static struct ahash_alg algs_sha224_sha256[] = { .halg.base = { .cra_name = "sha224", .cra_driver_name = "omap-sha224", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, @@ -1456,7 +1456,7 @@ static struct ahash_alg algs_sha224_sha256[] = { .halg.base = { .cra_name = "sha256", .cra_driver_name = "omap-sha256", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, @@ -1479,7 +1479,7 @@ static struct ahash_alg algs_sha224_sha256[] = { .halg.base = { .cra_name = "hmac(sha224)", .cra_driver_name = "omap-hmac-sha224", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, @@ -1503,7 +1503,7 @@ static struct ahash_alg algs_sha224_sha256[] = { .halg.base = { .cra_name = "hmac(sha256)", .cra_driver_name = "omap-hmac-sha256", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, @@ -1529,7 +1529,7 @@ static struct ahash_alg algs_sha384_sha512[] = { .halg.base = { .cra_name = "sha384", .cra_driver_name = "omap-sha384", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, @@ -1551,7 +1551,7 @@ static struct ahash_alg algs_sha384_sha512[] = { .halg.base = { .cra_name = "sha512", .cra_driver_name = "omap-sha512", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, @@ -1574,7 +1574,7 @@ static struct ahash_alg algs_sha384_sha512[] = { .halg.base = { .cra_name = "hmac(sha384)", .cra_driver_name = "omap-hmac-sha384", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, @@ -1598,7 +1598,7 @@ static struct ahash_alg algs_sha384_sha512[] = { .halg.base = { .cra_name = "hmac(sha512)", .cra_driver_name = "omap-hmac-sha512", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, -- cgit