I have created a method to overwrite bulk_create for certain queries, so that I get an pk returned from my MYSQL database.
def dict_fetch_all(cursor):
"""Return all rows from a cursor as a dict"""
columns = [col[0] for col in cursor.description]
return [
dict(zip(columns, row))
for row in cursor.fetchall()
]
class BulkQueryManager(models.Manager):
def bulk_create_return_with_id(self, objs, batch_size=2000):
self._for_write = True
fields = [f for f in self.model._meta.concrete_fields if not isinstance(f, AutoField)]
created_objs = []
with transaction.atomic(using=self.db):
with connections[self.db].cursor() as cursor:
for item in [objs[i:i + batch_size] for i in range(0, len(objs), batch_size)]:
query = sql.InsertQuery(self.model)
query.insert_values(fields, item)
for raw_sql, params in query.get_compiler(using=self.db).as_sql():
cursor.execute(raw_sql, params)
print('last row id: ', cursor.lastrowid, 'count: ', cursor.rowcount)
raw = "SELECT * FROM %s WHERE id >= %s ORDER BY id DESC LIMIT %s" % (
self.model._meta.db_table, cursor.lastrowid, cursor.rowcount
)
print(raw)
cursor.execute(raw)
created_objs.extend(dict_fetch_all(cursor))
print(created_objs)
post_save.send(item.__class__, instance=item, created=True)
return created_objs
However, created_objs isn't always the same as my input objs. Very rarely the wrong objects get returned. My cursor.lastrowid is always correct, but it seems like the first few rows sometimes get skipped.
For example:
print output:
last row id: 155407411 count: 22
print output:
SELECT * FROM product_research_offer WHERE id >= 155407411 ORDER BY id DESC LIMIT 22
simplified print output:
[{'id': 155407434}, {'id': 155407433}, {'id': 155407432}, {'id': 155407431}, {'id': 155407430}, {'id': 155407429}, {'id': 155407428}, {'id': 155407427}, {'id': 155407426}, {'id': 155407425}, {'id': 155407424}, {'id': 155407423}, {'id': 155407422}, {'id': 155407421}, {'id': 155407420}, {'id': 155407419}, {'id': 155407418}, {'id': 155407417}, {'id': 155407416}, {'id': 155407415}, {'id': 155407414}, {'id': 155407413}]
As you can see the returned 'id' starts from 155407413 instead of 155407411. While 155407411 is the correct id and the limit of 22 is correct. How can I make sure that the correct rows are returned or that rows aren't skipped?