
    gg}                         S SK r S SKJrJrJr  \" SS5      \" SS5      /r\" SS\R                  SS	5      \" S
5      \" SS\R                  S 5      /r\" SS\R                  S S	5      \" SS\R                  SS	5      /r\" SS\R                  S S	5      \" SS\R                  SS	5      /r	S r
S rS rS rS rS rS rg)    N)
ImportanceMetricRequestRequestedMetricsParser*device__attribute_compute_capability_majorcc_major*device__attribute_compute_capability_minorcc_minorzlts__t_sectors.suminput_sectorsF#l1tex__m_l1tex2xbar_write_bytes.sumz2dram__bytes_read.sum.pct_of_peak_sustained_elapseddram_read_bandwidthzlts__gcomp_input_sectors.sumcompression_input_sectorsz0lts__average_gcomp_input_sector_success_rate.pctcompression_success_rated   zlrc__ilc_input_sectors.sumz.lrc__average_ilc_input_sector_success_rate.pctc                      g)NMemoryL2Compression r       7nsight-compute-2025.1.1/sections/MemoryL2Compression.pyget_identifierr   Q   s     r   c                      g)NzMemory L2 Compressionr   r   r   r   get_namer   T   s    "r   c                      g)Nz;Detection of inefficient use of L2 Compute Data Compressionr   r   r   r   get_descriptionr   W   s    Hr   c                      g)NMemoryWorkloadAnalysis_Chartr   r   r   r   get_section_identifierr   Z   s    )r   c                      S/$ )NMemoryr   r   r   r   get_parent_rules_identifiersr   ]   s
    :r   c                 6   U S   R                  5       nU S   R                  5       nU S   R                  5       nU S   R                  5       nUS:X  a  [        R                  R                  S4$ SU-
  SX-  -
  -  U-  n[        R                  R                  U4$ )Nr   r
   r   r   r      )valueNvRules	IFrontendSpeedupType_LOCALSpeedupType_GLOBAL)metricsr   r
   r   dram_bandwidth_percentimprovement_percents         r   get_estimated_speedupr*   `   s     '(C D J J LO,224M&'ABHHJ$%:;AAC  22A55 
%	%(88	:
 	! 
 //1DDDr   c           	      z   [         R                  " U 5      nUR                  S5      R                  S5      nUR	                  5       n[        X5      R                  [        5      nUS   R                  5       S-  US   R                  5       -   nUS:  a  g [        nUS:  a  [        nU[        -  n[        X5      R                  U5      nUS   nUc  g Sn	US	   R                  5       n
U
S:  Ga  S
U
-  nUS   R                  5       nUR                  5       nX:  Ga:  SR                  X5      nUR                  [         R                  R                  US5      n[!        U5      u  nnUR#                  UUU5        UR%                  XS   R'                  5       U[         R                  R(                  S5        US:  aL  UR%                  XS	   R'                  5       U
[         R                  R(                  SR                  U5      5        UR%                  XS   R'                  5       US   R                  5       [         R                  R*                  S5        US   R                  5       nUS:  GaI  SU-  U:  Ga>  SR                  UU-  5      nUR                  [         R                  R                  US5      n[!        U5      u  nnUR#                  UUU5        UR%                  XS   R'                  5       U[         R                  R(                  S5        US:  aL  UR%                  XS	   R'                  5       U
[         R                  R(                  SR                  U5      5        UR%                  XS   R'                  5       US   R                  5       [         R                  R*                  S5        g g g g )Nr   r   
   r	   P   Z   r
      r   g      @@r   a	  Out of the {:.1f} bytes sent to the L2 Compression unit only {:.2f}% were successfully compressed. To increase this success rate, consider marking only those memory regions as compressible that contain the most zero values and/or expose the most homogeneous values.zLow Compression Ratez%Increase the compression success ratezPIncrease the number of compressed sectors towards all L2 input sectors ({:,.0f})r   zOThe higher the DRAM peak read utilization the more can be gained by compressionr   g      @a  The access patterns for writes to compressible memory are not well suited for the L2 Compression unit. As a consequence, {:.1f}x the data written to the L2 cache has to be communicated to the L2 Compression unit. Try maximizing local coherence for the write operations to compressible memory. For example, avoid writes with large strides as they lead to partial accesses to many L2 cache lines. Instead, try accessing fewer overall cache lines by modifying many values per cache line with each warp's execution of a write operation.zAccess Pattern)r#   get_contextrange_by_idxaction_by_idxfrontendr   parserequested_metrics_baser"   %requested_metrics_compression_generic#requested_metrics_compression_gh100requested_metricsformatmessager$   MsgType_MSG_OPTIMIZATIONr*   speedupfocus_metricnameSeverity_SEVERITY_HIGHSeverity_SEVERITY_LOW)handlectxactionfe
cc_metricsccrequested_metrics_for_chipr'   input_sectors_metriclow_compression_thresholdr   compression_input_bytesr   r
   r:   msg_idspeedup_typespeedup_valuel1tex_write_bytess                      r   applyrO   q   s   


f
%Ca ..q1F	B'7==>TUJ	J		%	%	'"	,z*/E/K/K/M	MB	Bw!F	Rx%H""33$V4::;UVG"?3# " '(C D J J L 1$"&)B"B#*+E#F#L#L#N ,224 $? b  i  i  jA  \GZZ 1 1 J JGUklF*?*H'L-JJv|];OOF,F$G$L$L$NPhjqj{j{  kS  kS  U|  }q 0K(L(Q(Q(SUnpw  qB  qB  qY  qYfmmn{|~OOF,A$B$G$G$I7ShKiKoKoKqsz  tE  tE  t[  t[ac $$IJPPR!.?(?BY(Y m  t  t  uL	  M	^	  u^	  _	GZZ 1 1 J JGUefF*?*H'L-JJv|];OOF,F$G$L$L$NPhjqj{j{  kS  kS  U|  }q 0K(L(Q(Q(SUnpw  qB  qB  qY  qYfmmn{|~OOF,A$B$G$G$I7ShKiKoKoKqsz  tE  tE  t[  t[ac )Z!- %r   )r#   RequestedMetricsr   r   r   r5   OPTIONALr8   r6   r7   r   r   r   r   r   r*   rO   r   r   r   <module>rR      s'  2  N N >
K>
K  &9L9LdTYZ78<			  &#	 :") %( $#	 8"' #$!#I*E"9cr   