django-cachalot/benchmark.py

import io
import os
import platform
import re
import sqlite3
from collections import OrderedDict
from datetime import datetime
from random import choice
from subprocess import check_output
from time import time

os.environ.setdefault("DJANGO_SETTINGS_MODULE", "settings")
import django
django.setup()

import matplotlib.pyplot as plt
import pandas as pd
import psycopg2
from django.conf import settings
from django.contrib.auth.models import Group, User
from django.core.cache import caches
from django.db import connection, connections
from django.test.utils import CaptureQueriesContext, override_settings
from django.utils.encoding import force_text
from MySQLdb import _mysql

import cachalot
from cachalot.api import invalidate
from cachalot.tests.models import Test


RESULTS_PATH = f"benchmark/docs/{datetime.now().date()}/"
CONTEXTS = ("Control", "Cold cache", "Hot cache")
DIVIDER = "divider"

LINUX_DATA_PATH = "/var/lib/"
DISK_DATA_RE = re.compile(r'^MODEL="(.*)" MOUNTPOINT="(.*)"$')


def get_disk_model_for_path_linux(path):
    out = force_text(check_output(["lsblk", "-Po", "MODEL,MOUNTPOINT"]))
    mount_points = []
    previous_model = None
    for model, mount_point in [
        DISK_DATA_RE.match(line).groups() for line in out.split("\n") if line
    ]:
        if model:
            previous_model = model.strip()
        if mount_point:
            mount_points.append((previous_model, mount_point))
    mount_points = sorted(mount_points, key=lambda t: -len(t[1]))
    for model, mount_point in mount_points:
        if path.startswith(mount_point):
            return model


def write_conditions():
    versions = OrderedDict()
    distribution = platform.uname()

    # Linux
    if distribution.system == "Linux":
        # CPU
        with open("/proc/cpuinfo") as f:
            versions["CPU"] = re.search(
                r"^model name\s+: (.+)$", f.read(), flags=re.MULTILINE
            ).group(1)
        # RAM
        with open("/proc/meminfo") as f:
            versions["RAM"] = re.search(
                r"^MemTotal:\s+(.+)$", f.read(), flags=re.MULTILINE
            ).group(1)
        # Disk Model
        versions.update((("Disk", get_disk_model_for_path_linux(LINUX_DATA_PATH)),))
        # OS
        versions["Linux distribution"] = f"{distribution.system} {distribution.release}"
    # Darwin
    else:
        # CPU
        versions["CPU"] = os.popen("sysctl -n machdep.cpu.brand_string").read().rstrip("\n")
        # RAM
        versions["RAM"] = os.popen("sysctl -n hw.memsize").read().rstrip("\n")
        # Disk Model
        versions["DISK"] = os.popen(
            "diskutil info /dev/disk0 | grep 'Device / Media Name'"
        ).read().split(":")[1].rstrip("\n").lstrip(" ")
        # OS
        versions["OS"] = f"{distribution.system} {distribution.release}"

    versions.update(
        (
            ("Python", platform.python_version()),
            ("Django", django.__version__),
            ("cachalot", cachalot.__version__),
            ("sqlite", sqlite3.sqlite_version),
        )
    )
    # PostgreSQL
    try:
        with connections["postgresql"].cursor() as cursor:
            cursor.execute("SELECT version();")
            versions["PostgreSQL"] = re.match(
                r"^PostgreSQL\s+(\S+)\s", cursor.fetchone()[0]
            ).group(1)
    except django.db.utils.OperationalError:
        raise django.db.utils.OperationalError(
            "You need a PostgreSQL DB called \"cachalot\" first. "
            "Login with \"psql -U postgres -h localhost\" and run: "
            "CREATE DATABASE cachalot;"
        )
    # MySQL
    try:
        with connections["mysql"].cursor() as cursor:
            cursor.execute("SELECT version();")
            versions["MySQL"] = cursor.fetchone()[0].split("-")[0]
    except django.db.utils.OperationalError:
        raise django.db.utils.OperationalError(
            "You need a MySQL DB called \"cachalot\" first. "
            "Login with \"mysql -u root\" and run: CREATE DATABASE cachalot;"
        )
    # Redis
    out = force_text(check_output(["redis-cli", "INFO", "server"])).replace("\r", "")
    versions["Redis"] = re.search(
        r"^redis_version:([\d\.]+)$", out, flags=re.MULTILINE
    ).group(1)
    # memcached
    out = force_text(check_output(["memcached", "-h"]))
    versions["memcached"] = re.match(
        r"^memcached ([\d\.]+)$", out, flags=re.MULTILINE
    ).group(1)

    versions.update(
        (
            ("psycopg2", psycopg2.__version__.split()[0]),
            ("mysqlclient", _mysql.__version__),
        )
    )

    with io.open(os.path.join(RESULTS_PATH, "conditions.rst"), "w") as f:
        f.write(
            "In this benchmark, a small database is generated, "
            "and each test is executed %s times "
            "under the following conditions:\n\n" % Benchmark.n
        )

        def write_table_sep(char="="):
            f.write((char * 20) + " " + (char * 50) + "\n")

        write_table_sep()
        for k, v in versions.items():
            f.write(k.ljust(20) + " " + v + "\n")
        write_table_sep()


class AssertNumQueries(CaptureQueriesContext):
    def __init__(self, n, using=None):
        self.n = n
        self.using = using
        super(AssertNumQueries, self).__init__(self.get_connection())

    def get_connection(self):
        if self.using is None:
            return connection
        return connections[self.using]

    def __exit__(self, exc_type, exc_val, exc_tb):
        super(AssertNumQueries, self).__exit__(exc_type, exc_val, exc_tb)
        if len(self) != self.n:
            print(
                "The amount of queries should be %s, but %s were captured."
                % (self.n, len(self))
            )


class Benchmark(object):
    n = 20

    def __init__(self):
        self.data = []

    def bench_once(self, context, num_queries, invalidate_before=False):
        for _ in range(self.n):
            if invalidate_before:
                invalidate(db_alias=self.db_alias)
            with AssertNumQueries(num_queries, using=self.db_alias):
                start = time()
                self.query_function(self.db_alias)
                end = time()
            self.data.append(
                {
                    "query": self.query_name,
                    "time": end - start,
                    "context": context,
                    "db": self.db_vendor,
                    "cache": self.cache_name,
                }
            )

    def benchmark(self, query_str, to_list=True, num_queries=1):
        # Clears the cache before a single benchmark to ensure the same
        # conditions across single benchmarks.
        caches[settings.CACHALOT_CACHE].clear()

        self.query_name = query_str
        query_str = "Test.objects.using(using)" + query_str
        if to_list:
            query_str = "list(%s)" % query_str
        self.query_function = eval("lambda using: " + query_str)

        with override_settings(CACHALOT_ENABLED=False):
            self.bench_once(CONTEXTS[0], num_queries)

        self.bench_once(CONTEXTS[1], num_queries, invalidate_before=True)

        self.bench_once(CONTEXTS[2], 0)

    def execute_benchmark(self):
        self.benchmark(".count()", to_list=False)
        self.benchmark(".first()", to_list=False)
        self.benchmark("[:10]")
        self.benchmark("[5000:5010]")
        self.benchmark(".filter(name__icontains='e')[0:10]")
        self.benchmark(".filter(name__icontains='e')[5000:5010]")
        self.benchmark(".order_by('owner')[0:10]")
        self.benchmark(".order_by('owner')[5000:5010]")
        self.benchmark(".select_related('owner')[0:10]")
        self.benchmark(".select_related('owner')[5000:5010]")
        self.benchmark(".prefetch_related('owner__groups')[0:10]", num_queries=3)
        self.benchmark(".prefetch_related('owner__groups')[5000:5010]", num_queries=3)

    def run(self):
        for db_alias in settings.DATABASES:
            self.db_alias = db_alias
            self.db_vendor = connections[self.db_alias].vendor
            print("Benchmarking %s…" % self.db_vendor)
            for cache_alias in settings.CACHES:
                cache = caches[cache_alias]
                self.cache_name = cache.__class__.__name__[:-5].lower()
                with override_settings(CACHALOT_CACHE=cache_alias):
                    self.execute_benchmark()

        self.df = pd.DataFrame.from_records(self.data)
        if not os.path.exists(RESULTS_PATH):
            os.mkdir(RESULTS_PATH)
        self.df.to_csv(os.path.join(RESULTS_PATH, "data.csv"))

        self.xlim = (0, self.df["time"].max() * 1.01)
        self.output("db")
        self.output("cache")

    def output(self, param):
        gp = self.df.groupby(["context", "query", param])["time"]
        self.means = gp.mean().unstack().unstack().reindex(CONTEXTS)
        los = self.means - gp.min().unstack().unstack().reindex(CONTEXTS)
        ups = gp.max().unstack().unstack().reindex(CONTEXTS) - self.means
        self.errors = dict(
            (
                key,
                dict(
                    (
                        subkey,
                        [
                            [los[key][subkey][context] for context in self.means.index],
                            [ups[key][subkey][context] for context in self.means.index],
                        ],
                    )
                    for subkey in self.means.columns.levels[1]
                ),
            )
            for key in self.means.columns.levels[0]
        )
        self.get_perfs(param)
        self.plot_detail(param)

        gp = self.df.groupby(["context", param])["time"]
        self.means = gp.mean().unstack().reindex(CONTEXTS)
        los = self.means - gp.min().unstack().reindex(CONTEXTS)
        ups = gp.max().unstack().reindex(CONTEXTS) - self.means
        self.errors = [
            [
                [los[key][context] for context in self.means.index],
                [ups[key][context] for context in self.means.index],
            ]
            for key in self.means
        ]
        self.plot_general(param)

    def get_perfs(self, param):
        with io.open(os.path.join(RESULTS_PATH, param + "_results.rst"), "w") as f:
            for v in self.means.columns.levels[0]:
                g = self.means[v].mean(axis=1)
                perf = "%s is %.1f× slower then %.1f× faster" % (
                    v.ljust(10),
                    g[CONTEXTS[1]] / g[CONTEXTS[0]],
                    g[CONTEXTS[0]] / g[CONTEXTS[2]],
                )
                print(perf)
                f.write("- %s\n" % perf)

    def plot_detail(self, param):
        for v in self.means.columns.levels[0]:
            plt.figure()
            axes = self.means[v].plot(
                kind="barh",
                xerr=self.errors[v],
                xlim=self.xlim,
                figsize=(15, 15),
                subplots=True,
                layout=(6, 2),
                sharey=True,
                legend=False,
            )
            plt.gca().invert_yaxis()
            for row in axes:
                for ax in row:
                    ax.xaxis.grid(True)
                    ax.set_ylabel("")
                    ax.set_xlabel("Time (s)")
            plt.savefig(os.path.join(RESULTS_PATH, "%s_%s.svg" % (param, v)))

    def plot_general(self, param):
        plt.figure()
        ax = self.means.plot(kind="barh", xerr=self.errors, xlim=self.xlim)
        ax.invert_yaxis()
        ax.xaxis.grid(True)
        ax.set_ylabel("")
        ax.set_xlabel("Time (s)")
        plt.savefig(os.path.join(RESULTS_PATH, "%s.svg" % param))


def create_data(using):
    User.objects.using(using).bulk_create(
        [User(username="user%d" % i) for i in range(50)]
    )
    Group.objects.using(using).bulk_create(
        [Group(name="test%d" % i) for i in range(10)]
    )
    groups = list(Group.objects.using(using))
    for u in User.objects.using(using):
        u.groups.add(choice(groups), choice(groups))
    users = list(User.objects.using(using))
    Test.objects.using(using).bulk_create(
        [Test(name="test%d" % i, owner=choice(users)) for i in range(10000)]
    )


if __name__ == "__main__":
    if not os.path.exists(RESULTS_PATH):
        os.mkdir(RESULTS_PATH)

    write_conditions()

    old_db_names = {}
    for alias in connections:
        conn = connections[alias]
        old_db_names[alias] = conn.settings_dict["NAME"]
        conn.creation.create_test_db(autoclobber=True)

        print("Populating %s…" % connections[alias].vendor)
        create_data(alias)

    Benchmark().run()

    for alias in connections:
        connections[alias].creation.destroy_test_db(old_db_names[alias])
-												Adds benchmark conditions.

											
										
										
											2015-02-16 20:25:19 +00:00
+								import io
-												Adds a benchmark.

											
										
										
											2014-11-23 19:09:42 +00:00
+								import os
-												Adds benchmark conditions.

											
										
										
											2015-02-16 20:25:19 +00:00
+								import platform
 								import re
 								import sqlite3
-												Support benchmarks for MacOS
* Added how to run benchmarks in docs and README

											
										
										
											2020-07-09 22:33:05 +00:00
+								from collections import OrderedDict
 								from datetime import datetime
 								from random import choice
-												Adds benchmark conditions.

											
										
										
											2015-02-16 20:25:19 +00:00
+								from subprocess import check_output
-												Adds a benchmark.

											
										
										
											2014-11-23 19:09:42 +00:00
+								from time import time
-												Adds benchmark results to the docs.

											
										
										
											2014-12-14 09:46:52 +00:00
-												Support benchmarks for MacOS
* Added how to run benchmarks in docs and README

											
										
										
											2020-07-09 22:33:05 +00:00
+								os.environ.setdefault("DJANGO_SETTINGS_MODULE", "settings")
-												Fixes Django 1.7 tests.

											
										
										
											2014-12-07 03:16:07 +00:00
+								import django
-												Fixes the benchmark by setting up django before importing a model.

Otherwise, Django thinks the Test model is from a 'tests' application instead of 'cachalot'.

											
										
										
											2015-10-05 19:06:15 +00:00
+								django.setup()
-												Support benchmarks for MacOS
* Added how to run benchmarks in docs and README

											
										
										
											2020-07-09 22:33:05 +00:00
+								import matplotlib.pyplot as plt
 								import pandas as pd
 								import psycopg2
-												Adds a benchmark.

											
										
										
											2014-11-23 19:09:42 +00:00
+								from django.conf import settings
-												Support benchmarks for MacOS
* Added how to run benchmarks in docs and README

											
										
										
											2020-07-09 22:33:05 +00:00
+								from django.contrib.auth.models import Group, User
-												Fixes a removed function in Django 1.9.

											
										
										
											2015-12-18 11:37:21 +00:00
+								from django.core.cache import caches
-												Support benchmarks for MacOS
* Added how to run benchmarks in docs and README

											
										
										
											2020-07-09 22:33:05 +00:00
+								from django.db import connection, connections
-												Simplifies cachalot_settings and forbids its modification.

											
										
										
											2014-12-08 18:43:43 +00:00
+								from django.test.utils import CaptureQueriesContext, override_settings
-												Adds Python 3 compatibility to the benchmark.

											
										
										
											2015-03-15 14:12:05 +00:00
+								from django.utils.encoding import force_text
-												Support benchmarks for MacOS
* Added how to run benchmarks in docs and README

											
										
										
											2020-07-09 22:33:05 +00:00
+								from MySQLdb import _mysql
-												Adds a benchmark.

											
										
										
											2014-11-23 19:09:42 +00:00
-												Adds benchmark conditions.

											
										
										
											2015-02-16 20:25:19 +00:00
+								import cachalot
-												Merges invalidate_all, invalidate_tables and invalidate_models, while optimising it.

											
										
										
											2015-10-05 20:31:47 +00:00
+								from cachalot.api import invalidate
-												Adds a benchmark.

											
										
										
											2014-11-23 19:09:42 +00:00
+								from cachalot.tests.models import Test
-												Support benchmarks for MacOS
* Added how to run benchmarks in docs and README

											
										
										
											2020-07-09 22:33:05 +00:00
+								RESULTS_PATH = f"benchmark/docs/{datetime.now().date()}/"
 								CONTEXTS = ("Control", "Cold cache", "Hot cache")
 								DIVIDER = "divider"
 								LINUX_DATA_PATH = "/var/lib/"
-												Adds the disk model name to the benchmark conditions.

											
										
										
											2016-01-11 20:06:22 +00:00
+								DISK_DATA_RE = re.compile(r'^MODEL="(.*)" MOUNTPOINT="(.*)"$')
-												Support benchmarks for MacOS
* Added how to run benchmarks in docs and README

											
										
										
											2020-07-09 22:33:05 +00:00
+								def get_disk_model_for_path_linux(path):
 								    out = force_text(check_output(["lsblk", "-Po", "MODEL,MOUNTPOINT"]))
-												Adds the disk model name to the benchmark conditions.

											
										
										
											2016-01-11 20:06:22 +00:00
+								    mount_points = []
 								    previous_model = None
-												Support benchmarks for MacOS
* Added how to run benchmarks in docs and README

											
										
										
											2020-07-09 22:33:05 +00:00
+								    for model, mount_point in [
 								        DISK_DATA_RE.match(line).groups() for line in out.split("\n") if line
 								    ]:
-												Adds the disk model name to the benchmark conditions.

											
										
										
											2016-01-11 20:06:22 +00:00
+								        if model:
 								            previous_model = model.strip()
 								        if mount_point:
 								            mount_points.append((previous_model, mount_point))
 								    mount_points = sorted(mount_points, key=lambda t: -len(t[1]))
 								    for model, mount_point in mount_points:
 								        if path.startswith(mount_point):
 								            return model
-												Adds a benchmark.

											
										
										
											2014-11-23 19:09:42 +00:00
-												Adds benchmark conditions.

											
										
										
											2015-02-16 20:25:19 +00:00
+								def write_conditions():
 								    versions = OrderedDict()
-												Support benchmarks for MacOS
* Added how to run benchmarks in docs and README

											
										
										
											2020-07-09 22:33:05 +00:00
+								    distribution = platform.uname()
 								    # Linux
 								    if distribution.system == "Linux":
 								        # CPU
 								        with open("/proc/cpuinfo") as f:
 								            versions["CPU"] = re.search(
 								                r"^model name\s+: (.+)$", f.read(), flags=re.MULTILINE
 								            ).group(1)
 								        # RAM
 								        with open("/proc/meminfo") as f:
 								            versions["RAM"] = re.search(
 								                r"^MemTotal:\s+(.+)$", f.read(), flags=re.MULTILINE
 								            ).group(1)
 								        # Disk Model
 								        versions.update((("Disk", get_disk_model_for_path_linux(LINUX_DATA_PATH)),))
 								        # OS
 								        versions["Linux distribution"] = f"{distribution.system} {distribution.release}"
 								    # Darwin
-												Adds benchmark conditions.

											
										
										
											2015-02-16 20:25:19 +00:00
+								    else:
-												Support benchmarks for MacOS
* Added how to run benchmarks in docs and README

											
										
										
											2020-07-09 22:33:05 +00:00
+								        # CPU
 								        versions["CPU"] = os.popen("sysctl -n machdep.cpu.brand_string").read().rstrip("\n")
 								        # RAM
 								        versions["RAM"] = os.popen("sysctl -n hw.memsize").read().rstrip("\n")
 								        # Disk Model
 								        versions["DISK"] = os.popen(
 								            "diskutil info /dev/disk0 | grep 'Device / Media Name'"
 								        ).read().split(":")[1].rstrip("\n").lstrip(" ")
 								        # OS
 								        versions["OS"] = f"{distribution.system} {distribution.release}"
 								    versions.update(
 								        (
 								            ("Python", platform.python_version()),
 								            ("Django", django.__version__),
 								            ("cachalot", cachalot.__version__),
 								            ("sqlite", sqlite3.sqlite_version),
 								        )
 								    )
-												Adds benchmark conditions.

											
										
										
											2015-02-16 20:25:19 +00:00
+								    # PostgreSQL
-												Support benchmarks for MacOS
* Added how to run benchmarks in docs and README

											
										
										
											2020-07-09 22:33:05 +00:00
+								    try:
 								        with connections["postgresql"].cursor() as cursor:
 								            cursor.execute("SELECT version();")
 								            versions["PostgreSQL"] = re.match(
 								                r"^PostgreSQL\s+(\S+)\s", cursor.fetchone()[0]
 								            ).group(1)
 								    except django.db.utils.OperationalError:
 								        raise django.db.utils.OperationalError(
 								            "You need a PostgreSQL DB called \"cachalot\" first. "
 								            "Login with \"psql -U postgres -h localhost\" and run: "
 								            "CREATE DATABASE cachalot;"
 								        )
-												Adds benchmark conditions.

											
										
										
											2015-02-16 20:25:19 +00:00
+								    # MySQL
-												Support benchmarks for MacOS
* Added how to run benchmarks in docs and README

											
										
										
											2020-07-09 22:33:05 +00:00
+								    try:
 								        with connections["mysql"].cursor() as cursor:
 								            cursor.execute("SELECT version();")
 								            versions["MySQL"] = cursor.fetchone()[0].split("-")[0]
 								    except django.db.utils.OperationalError:
 								        raise django.db.utils.OperationalError(
 								            "You need a MySQL DB called \"cachalot\" first. "
 								            "Login with \"mysql -u root\" and run: CREATE DATABASE cachalot;"
 								        )
-												Adds benchmark conditions.

											
										
										
											2015-02-16 20:25:19 +00:00
+								    # Redis
-												Support benchmarks for MacOS
* Added how to run benchmarks in docs and README

											
										
										
											2020-07-09 22:33:05 +00:00
+								    out = force_text(check_output(["redis-cli", "INFO", "server"])).replace("\r", "")
 								    versions["Redis"] = re.search(
 								        r"^redis_version:([\d\.]+)$", out, flags=re.MULTILINE
 								    ).group(1)
-												Adds benchmark conditions.

											
										
										
											2015-02-16 20:25:19 +00:00
+								    # memcached
-												Support benchmarks for MacOS
* Added how to run benchmarks in docs and README

											
										
										
											2020-07-09 22:33:05 +00:00
+								    out = force_text(check_output(["memcached", "-h"]))
 								    versions["memcached"] = re.match(
 								        r"^memcached ([\d\.]+)$", out, flags=re.MULTILINE
 								    ).group(1)
 								    versions.update(
 								        (
 								            ("psycopg2", psycopg2.__version__.split()[0]),
 								            ("mysqlclient", _mysql.__version__),
 								        )
 								    )
 								    with io.open(os.path.join(RESULTS_PATH, "conditions.rst"), "w") as f:
 								        f.write(
 								            "In this benchmark, a small database is generated, "
 								            "and each test is executed %s times "
 								            "under the following conditions:\n\n" % Benchmark.n
 								        )
 								        def write_table_sep(char="="):
 								            f.write((char * 20) + " " + (char * 50) + "\n")
-												Adds benchmark conditions.

											
										
										
											2015-02-16 20:25:19 +00:00
+								        write_table_sep()
 								        for k, v in versions.items():
-												Support benchmarks for MacOS
* Added how to run benchmarks in docs and README

											
										
										
											2020-07-09 22:33:05 +00:00
+								            f.write(k.ljust(20) + " " + v + "\n")
-												Adds benchmark conditions.

											
										
										
											2015-02-16 20:25:19 +00:00
+								        write_table_sep()
-												Adds a benchmark.

											
										
										
											2014-11-23 19:09:42 +00:00
+								class AssertNumQueries(CaptureQueriesContext):
 								    def __init__(self, n, using=None):
 								        self.n = n
 								        self.using = using
 								        super(AssertNumQueries, self).__init__(self.get_connection())
 								    def get_connection(self):
 								        if self.using is None:
 								            return connection
 								        return connections[self.using]
 								    def __exit__(self, exc_type, exc_val, exc_tb):
 								        super(AssertNumQueries, self).__exit__(exc_type, exc_val, exc_tb)
 								        if len(self) != self.n:
-												Support benchmarks for MacOS
* Added how to run benchmarks in docs and README

											
										
										
											2020-07-09 22:33:05 +00:00
+								            print(
 								                "The amount of queries should be %s, but %s were captured."
 								                % (self.n, len(self))
 								            )
-												Adds a benchmark.

											
										
										
											2014-11-23 19:09:42 +00:00
 								class Benchmark(object):
-												Adds an introduction to the benchmark.

											
										
										
											2015-02-22 04:00:48 +00:00
+								    n = 20
 								    def __init__(self):
-												Adds a benchmark.

											
										
										
											2014-11-23 19:09:42 +00:00
+								        self.data = []
-												Fixes an invalidation order issue in the benchmark.

											
										
										
											2014-11-23 22:11:51 +00:00
+								    def bench_once(self, context, num_queries, invalidate_before=False):
-												Adds a benchmark.

											
										
										
											2014-11-23 19:09:42 +00:00
+								        for _ in range(self.n):
-												Fixes an invalidation order issue in the benchmark.

											
										
										
											2014-11-23 22:11:51 +00:00
+								            if invalidate_before:
-												Merges invalidate_all, invalidate_tables and invalidate_models, while optimising it.

											
										
										
											2015-10-05 20:31:47 +00:00
+								                invalidate(db_alias=self.db_alias)
-												Adds a benchmark.

											
										
										
											2014-11-23 19:09:42 +00:00
+								            with AssertNumQueries(num_queries, using=self.db_alias):
 								                start = time()
 								                self.query_function(self.db_alias)
 								                end = time()
 								            self.data.append(
-												Support benchmarks for MacOS
* Added how to run benchmarks in docs and README

											
										
										
											2020-07-09 22:33:05 +00:00
+								                {
 								                    "query": self.query_name,
 								                    "time": end - start,
 								                    "context": context,
 								                    "db": self.db_vendor,
 								                    "cache": self.cache_name,
 								                }
 								            )
-												Adds a benchmark.

											
										
										
											2014-11-23 19:09:42 +00:00
 								    def benchmark(self, query_str, to_list=True, num_queries=1):
-												Clears caches during benchmark to ensure the same conditions for each test.

											
										
										
											2017-06-04 16:38:13 +00:00
+								        # Clears the cache before a single benchmark to ensure the same
 								        # conditions across single benchmarks.
 								        caches[settings.CACHALOT_CACHE].clear()
-												Adds a benchmark.

											
										
										
											2014-11-23 19:09:42 +00:00
+								        self.query_name = query_str
-												Support benchmarks for MacOS
* Added how to run benchmarks in docs and README

											
										
										
											2020-07-09 22:33:05 +00:00
+								        query_str = "Test.objects.using(using)" + query_str
-												Adds a benchmark.

											
										
										
											2014-11-23 19:09:42 +00:00
+								        if to_list:
-												Support benchmarks for MacOS
* Added how to run benchmarks in docs and README

											
										
										
											2020-07-09 22:33:05 +00:00
+								            query_str = "list(%s)" % query_str
 								        self.query_function = eval("lambda using: " + query_str)
-												Adds a benchmark.

											
										
										
											2014-11-23 19:09:42 +00:00
-												Simplifies cachalot_settings and forbids its modification.

											
										
										
											2014-12-08 18:43:43 +00:00
+								        with override_settings(CACHALOT_ENABLED=False):
-												Adds a benchmark.

											
										
										
											2014-11-23 19:09:42 +00:00
+								            self.bench_once(CONTEXTS[0], num_queries)
-												Fixes an invalidation order issue in the benchmark.

											
										
										
											2014-11-23 22:11:51 +00:00
+								        self.bench_once(CONTEXTS[1], num_queries, invalidate_before=True)
-												Adds a benchmark.

											
										
										
											2014-11-23 19:09:42 +00:00
 								        self.bench_once(CONTEXTS[2], 0)
 								    def execute_benchmark(self):
-												Support benchmarks for MacOS
* Added how to run benchmarks in docs and README

											
										
										
											2020-07-09 22:33:05 +00:00
+								        self.benchmark(".count()", to_list=False)
 								        self.benchmark(".first()", to_list=False)
 								        self.benchmark("[:10]")
 								        self.benchmark("[5000:5010]")
-												Improves the benchmark.

											
										
										
											2014-12-13 19:05:17 +00:00
+								        self.benchmark(".filter(name__icontains='e')[0:10]")
 								        self.benchmark(".filter(name__icontains='e')[5000:5010]")
 								        self.benchmark(".order_by('owner')[0:10]")
 								        self.benchmark(".order_by('owner')[5000:5010]")
 								        self.benchmark(".select_related('owner')[0:10]")
 								        self.benchmark(".select_related('owner')[5000:5010]")
-												Support benchmarks for MacOS
* Added how to run benchmarks in docs and README

											
										
										
											2020-07-09 22:33:05 +00:00
+								        self.benchmark(".prefetch_related('owner__groups')[0:10]", num_queries=3)
 								        self.benchmark(".prefetch_related('owner__groups')[5000:5010]", num_queries=3)
-												Adds a benchmark.

											
										
										
											2014-11-23 19:09:42 +00:00
 								    def run(self):
 								        for db_alias in settings.DATABASES:
 								            self.db_alias = db_alias
 								            self.db_vendor = connections[self.db_alias].vendor
-												Support benchmarks for MacOS
* Added how to run benchmarks in docs and README

											
										
										
											2020-07-09 22:33:05 +00:00
+								            print("Benchmarking %s…" % self.db_vendor)
-												Adds a benchmark.

											
										
										
											2014-11-23 19:09:42 +00:00
+								            for cache_alias in settings.CACHES:
-												Fixes a removed function in Django 1.9.

											
										
										
											2015-12-18 11:37:21 +00:00
+								                cache = caches[cache_alias]
-												Adds a benchmark.

											
										
										
											2014-11-23 19:09:42 +00:00
+								                self.cache_name = cache.__class__.__name__[:-5].lower()
-												Simplifies cachalot_settings and forbids its modification.

											
										
										
											2014-12-08 18:43:43 +00:00
+								                with override_settings(CACHALOT_CACHE=cache_alias):
-												Adds a benchmark.

											
										
										
											2014-11-23 19:09:42 +00:00
+								                    self.execute_benchmark()
 								        self.df = pd.DataFrame.from_records(self.data)
 								        if not os.path.exists(RESULTS_PATH):
 								            os.mkdir(RESULTS_PATH)
-												Support benchmarks for MacOS
* Added how to run benchmarks in docs and README

											
										
										
											2020-07-09 22:33:05 +00:00
+								        self.df.to_csv(os.path.join(RESULTS_PATH, "data.csv"))
-												Adds a benchmark.

											
										
										
											2014-11-23 19:09:42 +00:00
-												Support benchmarks for MacOS
* Added how to run benchmarks in docs and README

											
										
										
											2020-07-09 22:33:05 +00:00
+								        self.xlim = (0, self.df["time"].max() * 1.01)
 								        self.output("db")
 								        self.output("cache")
-												Generates benchmark plots about caches too.

											
										
										
											2014-11-23 23:13:29 +00:00
 								    def output(self, param):
-												Support benchmarks for MacOS
* Added how to run benchmarks in docs and README

											
										
										
											2020-07-09 22:33:05 +00:00
+								        gp = self.df.groupby(["context", "query", param])["time"]
-												Improves benchmark plots.

											
										
										
											2014-11-24 11:04:01 +00:00
+								        self.means = gp.mean().unstack().unstack().reindex(CONTEXTS)
 								        los = self.means - gp.min().unstack().unstack().reindex(CONTEXTS)
 								        ups = gp.max().unstack().unstack().reindex(CONTEXTS) - self.means
 								        self.errors = dict(
-												Support benchmarks for MacOS
* Added how to run benchmarks in docs and README

											
										
										
											2020-07-09 22:33:05 +00:00
+								            (
 								                key,
 								                dict(
 								                    (
 								                        subkey,
 								                        [
 								                            [los[key][subkey][context] for context in self.means.index],
 								                            [ups[key][subkey][context] for context in self.means.index],
 								                        ],
 								                    )
 								                    for subkey in self.means.columns.levels[1]
 								                ),
 								            )
 								            for key in self.means.columns.levels[0]
 								        )
-												Adds benchmark results to the docs.

											
										
										
											2014-12-14 09:46:52 +00:00
+								        self.get_perfs(param)
-												Improves benchmark plots.

											
										
										
											2014-11-24 11:04:01 +00:00
+								        self.plot_detail(param)
-												Support benchmarks for MacOS
* Added how to run benchmarks in docs and README

											
										
										
											2020-07-09 22:33:05 +00:00
+								        gp = self.df.groupby(["context", param])["time"]
-												Improves benchmark plots.

											
										
										
											2014-11-24 11:04:01 +00:00
+								        self.means = gp.mean().unstack().reindex(CONTEXTS)
 								        los = self.means - gp.min().unstack().reindex(CONTEXTS)
 								        ups = gp.max().unstack().reindex(CONTEXTS) - self.means
 								        self.errors = [
-												Support benchmarks for MacOS
* Added how to run benchmarks in docs and README

											
										
										
											2020-07-09 22:33:05 +00:00
+								            [
 								                [los[key][context] for context in self.means.index],
 								                [ups[key][context] for context in self.means.index],
 								            ]
 								            for key in self.means
 								        ]
-												Improves benchmark plots.

											
										
										
											2014-11-24 11:04:01 +00:00
+								        self.plot_general(param)
-												Adds a benchmark.

											
										
										
											2014-11-23 19:09:42 +00:00
-												Adds benchmark results to the docs.

											
										
										
											2014-12-14 09:46:52 +00:00
+								    def get_perfs(self, param):
-												Support benchmarks for MacOS
* Added how to run benchmarks in docs and README

											
										
										
											2020-07-09 22:33:05 +00:00
+								        with io.open(os.path.join(RESULTS_PATH, param + "_results.rst"), "w") as f:
-												Adds benchmark results to the docs.

											
										
										
											2014-12-14 09:46:52 +00:00
+								            for v in self.means.columns.levels[0]:
 								                g = self.means[v].mean(axis=1)
-												Support benchmarks for MacOS
* Added how to run benchmarks in docs and README

											
										
										
											2020-07-09 22:33:05 +00:00
+								                perf = "%s is %.1f× slower then %.1f× faster" % (
 								                    v.ljust(10),
 								                    g[CONTEXTS[1]] / g[CONTEXTS[0]],
 								                    g[CONTEXTS[0]] / g[CONTEXTS[2]],
 								                )
-												Adds benchmark results to the docs.

											
										
										
											2014-12-14 09:46:52 +00:00
+								                print(perf)
-												Support benchmarks for MacOS
* Added how to run benchmarks in docs and README

											
										
										
											2020-07-09 22:33:05 +00:00
+								                f.write("- %s\n" % perf)
-												Adds a benchmark.

											
										
										
											2014-11-23 19:09:42 +00:00
-												Improves benchmark plots.

											
										
										
											2014-11-24 11:04:01 +00:00
+								    def plot_detail(self, param):
-												Generates benchmark plots about caches too.

											
										
										
											2014-11-23 23:13:29 +00:00
+								        for v in self.means.columns.levels[0]:
-												Adds a benchmark.

											
										
										
											2014-11-23 19:09:42 +00:00
+								            plt.figure()
-												Improves benchmark plots.

											
										
										
											2014-11-24 11:04:01 +00:00
+								            axes = self.means[v].plot(
-												Support benchmarks for MacOS
* Added how to run benchmarks in docs and README

											
										
										
											2020-07-09 22:33:05 +00:00
+								                kind="barh",
 								                xerr=self.errors[v],
 								                xlim=self.xlim,
 								                figsize=(15, 15),
 								                subplots=True,
 								                layout=(6, 2),
 								                sharey=True,
 								                legend=False,
 								            )
-												Adds the disk model name to the benchmark conditions.

											
										
										
											2016-01-11 20:06:22 +00:00
+								            plt.gca().invert_yaxis()
-												Improves benchmark plots.

											
										
										
											2014-11-24 11:04:01 +00:00
+								            for row in axes:
 								                for ax in row:
-												Generates a grid on the benchmark results.

											
										
										
											2016-01-10 20:56:48 +00:00
+								                    ax.xaxis.grid(True)
-												Support benchmarks for MacOS
* Added how to run benchmarks in docs and README

											
										
										
											2020-07-09 22:33:05 +00:00
+								                    ax.set_ylabel("")
 								                    ax.set_xlabel("Time (s)")
 								            plt.savefig(os.path.join(RESULTS_PATH, "%s_%s.svg" % (param, v)))
-												Improves benchmark plots.

											
										
										
											2014-11-24 11:04:01 +00:00
 								    def plot_general(self, param):
 								        plt.figure()
-												Support benchmarks for MacOS
* Added how to run benchmarks in docs and README

											
										
										
											2020-07-09 22:33:05 +00:00
+								        ax = self.means.plot(kind="barh", xerr=self.errors, xlim=self.xlim)
-												Generates a grid on the benchmark results.

											
										
										
											2016-01-10 20:56:48 +00:00
+								        ax.invert_yaxis()
 								        ax.xaxis.grid(True)
-												Support benchmarks for MacOS
* Added how to run benchmarks in docs and README

											
										
										
											2020-07-09 22:33:05 +00:00
+								        ax.set_ylabel("")
 								        ax.set_xlabel("Time (s)")
 								        plt.savefig(os.path.join(RESULTS_PATH, "%s.svg" % param))
-												Adds a benchmark.

											
										
										
											2014-11-23 19:09:42 +00:00
 								def create_data(using):
 								    User.objects.using(using).bulk_create(
-												Support benchmarks for MacOS
* Added how to run benchmarks in docs and README

											
										
										
											2020-07-09 22:33:05 +00:00
+								        [User(username="user%d" % i) for i in range(50)]
 								    )
-												Adds a benchmark.

											
										
										
											2014-11-23 19:09:42 +00:00
+								    Group.objects.using(using).bulk_create(
-												Support benchmarks for MacOS
* Added how to run benchmarks in docs and README

											
										
										
											2020-07-09 22:33:05 +00:00
+								        [Group(name="test%d" % i) for i in range(10)]
 								    )
-												Improves benchmark plots.

											
										
										
											2014-11-24 11:04:01 +00:00
+								    groups = list(Group.objects.using(using))
-												Adds a benchmark.

											
										
										
											2014-11-23 19:09:42 +00:00
+								    for u in User.objects.using(using):
-												Improves benchmark plots.

											
										
										
											2014-11-24 11:04:01 +00:00
+								        u.groups.add(choice(groups), choice(groups))
 								    users = list(User.objects.using(using))
-												Adds a benchmark.

											
										
										
											2014-11-23 19:09:42 +00:00
+								    Test.objects.using(using).bulk_create(
-												Support benchmarks for MacOS
* Added how to run benchmarks in docs and README

											
										
										
											2020-07-09 22:33:05 +00:00
+								        [Test(name="test%d" % i, owner=choice(users)) for i in range(10000)]
 								    )
-												Adds a benchmark.

											
										
										
											2014-11-23 19:09:42 +00:00
-												Support benchmarks for MacOS
* Added how to run benchmarks in docs and README

											
										
										
											2020-07-09 22:33:05 +00:00
+								if __name__ == "__main__":
-												Adds the disk model name to the benchmark conditions.

											
										
										
											2016-01-11 20:06:22 +00:00
+								    if not os.path.exists(RESULTS_PATH):
 								        os.mkdir(RESULTS_PATH)
-												Adds benchmark conditions.

											
										
										
											2015-02-16 20:25:19 +00:00
+								    write_conditions()
-												Adds a benchmark.

											
										
										
											2014-11-23 19:09:42 +00:00
+								    old_db_names = {}
 								    for alias in connections:
 								        conn = connections[alias]
-												Support benchmarks for MacOS
* Added how to run benchmarks in docs and README

											
										
										
											2020-07-09 22:33:05 +00:00
+								        old_db_names[alias] = conn.settings_dict["NAME"]
-												Adds a benchmark.

											
										
										
											2014-11-23 19:09:42 +00:00
+								        conn.creation.create_test_db(autoclobber=True)
-												Improves a benchmark message.

											
										
										
											2016-01-11 21:08:44 +00:00
+								        print("Populating %s…" % connections[alias].vendor)
-												Adds a benchmark.

											
										
										
											2014-11-23 19:09:42 +00:00
+								        create_data(alias)
 								    Benchmark().run()
 								    for alias in connections:
 								        connections[alias].creation.destroy_test_db(old_db_names[alias])