fix json stream decode performance issue

string slicing is expensive as python copies the (immutable) string...
This commit is contained in:
Stefan Bühler 2023-10-02 09:18:25 +02:00
parent 7f7841f2bf
commit b8d9c6f2a1

21
pqm
View File

@ -184,6 +184,21 @@ class QueueName(enum.Enum):
ALL_QUEUE_NAMES: set[QueueName] = set(QueueName) ALL_QUEUE_NAMES: set[QueueName] = set(QueueName)
def json_decode_stream(data: str):
decoder = json.JSONDecoder()
data_len = len(data)
data_pos = 0
while data_len > data_pos and data[data_len-1].isspace():
data_len -= 1
while True:
while data_pos < data_len and data[data_pos].isspace():
data_pos += 1
if data_pos >= data_len:
return
obj, data_pos = decoder.raw_decode(data, data_pos)
yield obj
@dataclasses.dataclass @dataclasses.dataclass
class Mail: class Mail:
"""Metadata for mail in postfix queue""" """Metadata for mail in postfix queue"""
@ -209,11 +224,7 @@ class Mail:
@staticmethod @staticmethod
def read_postqueue_json(data: str, id_prefix: str = '') -> list[Mail]: def read_postqueue_json(data: str, id_prefix: str = '') -> list[Mail]:
queue = [] queue = []
decoder = json.JSONDecoder() for obj in json_decode_stream(data):
data = data.strip()
while data:
obj, end = decoder.raw_decode(data, 0)
data = data[end:].lstrip()
mail = Mail.from_json(obj) mail = Mail.from_json(obj)
mail.queue_id = id_prefix + mail.queue_id mail.queue_id = id_prefix + mail.queue_id
queue.append(mail) queue.append(mail)