Source code for pymemtrace.util.dtrace_log_analyse

"""
This attempts to reconcile all the memory and free allocations reported by DTrace.

For example given the DTrace log like this:

.. code-block:: text

    dtrace:::BEGIN
     77633  cmn_cmd_opts.py:141  -> set_log_level malloc(560) pntr 0x7fca83ef4240
     77633      __init__.py:422  -> validate malloc(1114) pntr 0x7fca858e4200
     77633      __init__.py:422  -> validate free(0x7fca858e4200)
     77633     threading.py:817  -> __init__ malloc(576) pntr 0x7fca83ef4470
     77633      __init__.py:471  -> _init malloc(576) pntr 0x7fca83ef46b0
     77633     threading.py:870  -> start malloc(264) pntr 0x7fca83ef21e0
     77633     threading.py:870  -> start malloc(16) pntr 0x7fca83d48a30
     77633     threading.py:574  -> wait malloc(536) pntr 0x7fca83d54be0
     77633     threading.py:364  -> notify malloc(528) pntr 0x7fca83c47e80
     77633       process.py:268  -> _get_process_data free(0x0)
     77633       process.py:268  -> _get_process_data malloc(4096) pntr 0x7fca8408d200
     77633       process.py:268  -> _get_process_data malloc(25) pntr 0x7fca83c00620
     77633       process.py:268  -> _get_process_data malloc(4096) pntr 0x7fca8408e200
     77633       process.py:268  -> _get_process_data free(0x0)
     77633       process.py:268  -> _get_process_data free(0x0)
     77633       process.py:268  -> _get_process_data free(0x0)
     77633       process.py:268  -> _get_process_data free(0x0)
     77633       process.py:268  -> _get_process_data free(0x0)
     77633       process.py:268  -> _get_process_data free(0x0)
     77633       process.py:268  -> _get_process_data free(0x0)
     77633       process.py:268  -> _get_process_data free(0x7fca83c00620)
     77633       process.py:268  -> _get_process_data free(0x7fca8408e200)
     77633       process.py:268  -> _get_process_data free(0x7fca8408d200)
    dtrace::END

This produces the following summary output:

.. code-block:: text

    2025-08-26 13:35:50,956 - dtrace_log_analyse.py#201 - INFO     - Ending DTrace block at line 16261386
    DTraceMallocs:
    malloc count outstanding:      926,784
     Total bytes outstanding:  122,456,515
       Max bytes outstanding:  220,487,009
        Total bytes malloc'd:  837,846,488
          Total bytes free'd:  715,389,973
              Maximum malloc:      589,880
             Max allocations:    2,314,384
               Count mallocs:    7,197,954
        Count frees non-null:    6,271,170
            Count frees null:       39,119
    Total lines: 16,261,304 Failed parse lines: 36,526 Failed malloc: 931,407 Failed free: 1,785,128
    Process time: 602.995 (s)
    (pymemtrace_3.12_A)

"""
import argparse
import dataclasses
import io
import logging
import re
import sys
import time
import typing

logger = logging.getLogger(__file__)

# Matches ' 77633  cmn_cmd_opts.py:141  -> set_log_level malloc(560) pntr 0x7fca83ef4240'
# To: ('77633', 'cmn_cmd_opts.py', '141', 'set_log_level', '560', '0x7fca83ef4240')
RE_MALLOC = re.compile(r'^\s+(\d+)\s+([^:]+):(\d+)\s+->\s+(\S.*\S)?\s+malloc\((\d+)\)\s+pntr\s+(0x[0-9a-f]+)$')

# Given a line:
# " 77633 rs/paulross/PycharmProjects/TotalDepth/src/TotalDepth/common/np_summary.py:100  -> y malloc(15424) pntr 0x7fca8410fe00\n"
# split('->')
# Then apply RE_MALLOC_LEFT and RE_MALLOC_RIGHT
RE_MALLOC_LEFT = re.compile(r'^\s+(\d+)\s+([^:]+):(\d+)\s+$')
RE_MALLOC_RIGHT = re.compile(r'^\s+(\S+)?\s+malloc\((\d+)\)\s+pntr\s+(0x[0-9a-f]+)$')

# Matches '... malloc(560) pntr 0x7fca83ef4240'
# To: ('... ', '560', '0x7fca83ef4240')
RE_MALLOC_LAST_RESORT = re.compile(r'^(.+)malloc\((\d+)\)\s+pntr\s+(0x[0-9a-f]+)$')
# Matches ' 77633       process.py:268  -> _get_process_data free(0x7fca83c00620)'
# To: ('77633', 'process.py', '268', '_get_process_data', '0x7fca83c00620')
RE_FREE = re.compile(r'^\s+(\d+)\s+([^:]+)?:(\d+)\s+->\s+(\S.*\S)?\s+free\((0x[0-9a-f]+)\)$')
# Matches ' 77633  anything... free(0x7fca83c00620)'
# To: ('77633', 'anything... ', '0x7fca83c00620')
RE_FREE_FALLBACK_A = re.compile(r'^\s+(\d+)(.+)free\((0x[0-9a-f]+)\)$')
# Matches '... free(0x7fca83c00620)'
# To: ('... ', '0x7fca83c00620')
RE_FREE_LAST_RESORT = re.compile(r'^(.+)free\((0x[0-9a-f]+)\)$')


[docs] @dataclasses.dataclass class DTraceMallocCall: log_line: int process: int file: str line: int function: typing.Optional[str] size: int pointer: int
[docs] @dataclasses.dataclass class DTraceFreeCall: log_line_num: int process: int file: str line: int function: str pointer: int
class DTraceMallocs: def __init__(self): self.malloc_dict: typing.Dict[int, DTraceMallocCall] = {} self.count_mallocs = 0 self.count_frees_non_null = 0 self.count_frees_null = 0 self.count_lines = 0 self.total_bytes_malloced = 0 self.total_bytes_freed = 0 self.total_bytes_outstanding = 0 self.max_malloc = 0 self.max_allocations = 0 self.max_bytes_outstanding = 0 self.failed_to_add_malloc = 0 self.failed_to_add_free = 0 self.failed_to_parse_line = 0 def __len__(self) -> int: return len(self.malloc_dict) def add_malloc(self, malloc_call: DTraceMallocCall): if malloc_call.pointer in self.malloc_dict: was = self.malloc_dict[malloc_call.pointer] # raise ValueError( # f'Malloc call at 0x{malloc_call.pointer:012x} already in dict' # f' Was log line: {was.log_line} now {malloc_call.log_line}' # ) logger.error( f'Malloc call at 0x{malloc_call.pointer:012x} already in dict' f' Was log line: {was.log_line} now {malloc_call.log_line}' ) self.failed_to_add_malloc += 1 else: self.malloc_dict[malloc_call.pointer] = malloc_call self.count_mallocs += 1 self.total_bytes_malloced += malloc_call.size self.total_bytes_outstanding += malloc_call.size self.max_allocations = max(self.max_allocations, len(self)) self.max_bytes_outstanding = max(self.max_bytes_outstanding, self.total_bytes_outstanding) self.max_malloc = max(self.max_malloc, malloc_call.size) self.count_lines += 1 def add_free(self, free_call: DTraceFreeCall): if free_call.pointer != 0: if free_call.pointer not in self.malloc_dict: # raise ValueError(f'Free call at 0x{free_call.pointer:012x} not in dict') logger.error( f'Free call at 0x{free_call.pointer:012x} line {free_call.log_line_num} not in dict' ) self.failed_to_add_free += 1 else: size_free = self.malloc_dict[free_call.pointer].size self.total_bytes_freed += size_free self.total_bytes_outstanding -= size_free del self.malloc_dict[free_call.pointer] self.count_frees_non_null += 1 else: self.count_frees_null += 1 self.count_lines += 1 def str_failures(self) -> str: return ( f'Total lines: {(self.count_lines + self.failed_to_parse_line):,d}' f' Failed parse lines: {self.failed_to_parse_line:,d}' f' Failed malloc: {self.failed_to_add_malloc:,d}' f' Failed free: {self.failed_to_add_free:,d}' ) def match_malloc_line(line: str, log_line: int) -> typing.Optional[DTraceMallocCall]: m = RE_MALLOC.match(line) if m is not None: malloc_call = DTraceMallocCall( log_line, int(m.group(1)), m.group(2), int(m.group(3)), m.group(4), int(m.group(5)), int(m.group(6), base=16) ) return malloc_call else: # Fall back on a split technique that matches: # " 77633 rs/paulross/PycharmProjects/TotalDepth/src/TotalDepth/common/np_summary.py:100 -> y malloc(15424) pntr 0x7fca8410fe00\n" split = line.split('->') if len(split) == 2: m_left = RE_MALLOC_LEFT.match(split[0]) m_right = RE_MALLOC_RIGHT.match(split[1]) if m_left and m_right: malloc_call = DTraceMallocCall( log_line, int(m_left.group(1)), m_left.group(2), int(m_left.group(3)), m_right.group(1), int(m_right.group(2)), int(m_right.group(3), base=16) ) return malloc_call else: m = RE_MALLOC_LAST_RESORT.match(line) if m is not None: malloc_call = DTraceMallocCall( log_line, -1, m.group(1), -1, '', int(m.group(2)), int(m.group(3), base=16) ) return malloc_call def match_free_line(line: str, log_line: int) -> typing.Optional[DTraceFreeCall]: m = RE_FREE.match(line) if m is not None: free_call = DTraceFreeCall( log_line, int(m.group(1)), m.group(2), int(m.group(3)), m.group(4), int(m.group(5), base=16) ) return free_call else: m = RE_FREE_FALLBACK_A.match(line) if m is not None: free_call = DTraceFreeCall( log_line, int(m.group(1)), "", -1, m.group(2), int(m.group(3), base=16) ) return free_call else: m = RE_FREE_LAST_RESORT.match(line) if m is not None: free_call = DTraceFreeCall( log_line, -1, m.group(1), -1, "", int(m.group(2), base=16) ) return free_call def read_dtrace_log_file(log_file: io.StringIO) -> DTraceMallocs: in_dtrace_block = False log_line_num = 1 malloc_dict = DTraceMallocs() for line in log_file.readlines(): if log_line_num % 1000 == 0: logger.info('Reading line %d', log_line_num) if line != '\n': if line == 'dtrace:::BEGIN\n': in_dtrace_block = True logger.info('Starting DTrace block at line %d', log_line_num) elif line == 'dtrace:::END\n': in_dtrace_block = False logger.info('Ending DTrace block at line %d', log_line_num) break elif in_dtrace_block: malloc_call = match_malloc_line(line, log_line_num) if malloc_call is not None: malloc_dict.add_malloc(malloc_call) else: free_call = match_free_line(line, log_line_num) if free_call is not None: malloc_dict.add_free(free_call) else: logger.error(f'Can not parse line [{log_line_num}]: "{line}"') malloc_dict.failed_to_parse_line += 1 log_line_num += 1 return malloc_dict def read_dtrace_log(log_path: str) -> DTraceMallocs: with open(log_path, encoding='latin-1') as log_file: return read_dtrace_log_file(log_file) def pprint_mallocs(dtm: DTraceMallocs, st=sys.stdout) -> None: st.write('DTraceMallocs:\n') st.write(f'malloc count outstanding: {len(dtm):12,d}\n') st.write(f' Total bytes outstanding: {dtm.total_bytes_outstanding:12,d}\n') st.write(f' Max bytes outstanding: {dtm.max_bytes_outstanding:12,d}\n') st.write(f" Total bytes malloc'd: {dtm.total_bytes_malloced:12,d}\n") st.write(f" Total bytes free'd: {dtm.total_bytes_freed:12,d}\n") st.write(f" Maximum malloc: {dtm.max_malloc:12,d}\n") st.write(f' Max allocations: {dtm.max_allocations:12,d}\n') st.write(f' Count mallocs: {dtm.count_mallocs:12,d}\n') st.write(f' Count frees non-null: {dtm.count_frees_non_null:12,d}\n') st.write(f' Count frees null: {dtm.count_frees_null:12,d}\n') st.write(dtm.str_failures() + '\n') def main() -> int: parser = argparse.ArgumentParser( prog='dtrace_log_analyse.py', description="""Reads an DTrace log of a process and analyses it.""", ) parser.add_argument("-l", "--log_level", type=int, dest="log_level", default=20, help="Log Level (debug=10, info=20, warning=30, error=40, critical=50)" " [default: %(default)s]" ) parser.add_argument('path_in', type=str, help='Input path to the DTrace log.', nargs='?') args = parser.parse_args() logging.basicConfig( level=args.log_level, # format='%(asctime)s - %(filename)s#%(lineno)d - %(process)5d - (%(threadName)-10s) - %(levelname)-8s - %(message)s', format='%(asctime)s - %(filename)s#%(lineno)d - %(levelname)-8s - %(message)s', stream=sys.stdout, ) time_start = time.perf_counter() result = read_dtrace_log(args.path_in) pprint_mallocs(result) print(f'Process time: {time.perf_counter() - time_start:.3f} (s)') return 0 if __name__ == '__main__': exit(main())