我构建了一个测试,比较偏移量、光标和行编号()。我对row_number()的印象是正确的,即无论结果集在哪里,它的速度都是一致的。然而,这个速度比偏移或光标都要慢得多,正如我的印象一样,速度几乎是一样的,都会在速度上降低,直到结果结束。
结果:
offset(100,100): 0.016359
scroll(100,100): 0.018393
rownum(100,100): 15.535614
offset(100,480000): 1.761800
scroll(100,480000): 1.781913
rownum(100,480000): 15.158601
offset(100,999900): 3.670898
scroll(100,999900): 3.664517
rownum(100,999900): 14.581068
from sqlalchemy import *
metadata = MetaData()
engine = create_engine('postgresql://scott:tiger@localhost/test', echo=True)
t1 = Table('t1', metadata,
Column('id', Integer, primary_key=True),
Column('d1', String(50)),
Column('d2', String(50)),
Column('d3', String(50)),
Column('d4', String(50)),
Column('d5', String(50))
)
if not engine.has_table('t1'):
conn = engine.connect()
t1.create(conn)
# 1000000 rows
for i in range(100):
conn.execute(t1.insert(), [
dict(
('d%d' % col, "data data data %d %d" % (col, (i * 10000) + j))
for col in range(1, 6)
) for j in xrange(1, 10001)
])
import time
def timeit(fn, count, *args):
now = time.time()
for i in xrange(count):
fn(*args)
total = time.time() - now
print "%s(%s): %f" % (fn.__name__, ",".join(repr(x) for x in args), total)
# this is a raw psycopg2 connection.
conn = engine.raw_connection()
def offset(limit, offset):
cursor = conn.cursor()
cursor.execute("select * from t1 order by id limit %d offset %d" % (limit, offset))
cursor.fetchall()
cursor.close()
def rownum(limit, offset):
cursor = conn.cursor()
cursor.execute("select * from (select *, "
"row_number() over (order by id asc) as rownum from t1) as foo "
"where rownum>=%d and rownum<%d" % (offset, limit + offset))
cursor.fetchall()
cursor.close()
def scroll(limit, offset):
cursor = conn.cursor('foo')
cursor.execute("select * from t1 order by id")
cursor.scroll(offset)
cursor.fetchmany(limit)
cursor.close()
print
timeit(offset, 10, 100, 100)
timeit(scroll, 10, 100, 100)
timeit(rownum, 10, 100, 100)
print
timeit(offset, 10, 100, 480000)
timeit(scroll, 10, 100, 480000)
timeit(rownum, 10, 100, 480000)
print
timeit(offset, 10, 100, 999900)
timeit(scroll, 10, 100, 999900)
timeit(rownum, 10, 100, 999900)