
    
3jH                        S r SSKrSSKrSSKJr  SSKJrJrJrJ	r	J
r
JrJr  SrSrSrSrS	\S
\\\4   4S jrS\	\\\4      S
\\\4   4S jr\" SS9 " S S5      5       rS\S\
\   S\
\   S
\4S jrS\S
\4S jrS\\\4   S
\\\4   4S jrS\\\\\4   \\\\4   S
\\\\\\4   4   4S jrS\	\\\\\4      S\\\4   S\S\
\   S\S
\
\\\\\4   \4   \\\4   4      4S  jrS\\\\\\\4   4   S
\\\
\   4   4S! jrS\\\\\4   \\\4   S
\\\	\\\4      \\\4   4   4S" jrS\	\\\4      S\\\4   S\S\S
\
\\	\	\\\4         \\\4   4      4
S# jrS\	\\\4      S$\\\4   S\S
\
\	\
\         4S% jr g)&a  
Internal multiprocessing helpers for DeepDiff.

Phase 1 scope: parallelize the (added_hash x removed_hash) rough-distance loop
in ``DeepDiff._get_most_in_common_pairs_in_iterables`` for ``ignore_order=True``.

Determinism contract (see docs/multi_processing.md):
- Pair selection happens in the parent only.
- Workers compute distances. The parent submits jobs in a stable index order
  matching the serial nested loop and merges results by that index.
- Worker completion order (``as_completed``) never affects the public output.

Only module-level callables live here so the module is safe under the
``spawn`` start method (macOS/Windows).
    N)	dataclass)AnyCallableDictListOptionalTuplecast   @   )z
DIFF COUNTzPASSES COUNTzDISTANCE CACHE HIT COUNT)zMAX PASS LIMIT REACHEDzMAX DIFF LIMIT REACHEDdiff_instancereturnc                     [        U SS5      =(       d    0 n0 n[         H)  n[        UR                  US5      =(       d    S5      X#'   M+     [         H   n[        UR                  US5      5      X#'   M"     U$ )aS  Pull a small, picklable stats snapshot off a worker-local DeepDiff.

Returns a dict with integer counters plus boolean limit flags. Missing keys
are tolerated so this stays robust if ``_stats`` shrinks at the end of
``__init__`` (it currently deletes ``DISTANCE CACHE ENABLED`` and the
``PREVIOUS *`` bookkeeping keys before we get here).
_statsNr   F)getattr_WORKER_STATS_COUNTER_KEYSintget_WORKER_STATS_FLAG_KEYSbool)r   statsdeltakeys       S/home/wildlama/miniconda3/lib/python3.13/site-packages/deepdiff/_multiprocessing.py_extract_worker_statsr   "   sh     M8T28bEE)3*/a0
 *&%))C/0
 'L    deltasc                 J   [          Vs0 s H  oS_M     nn[         H  nSX!'   M	     U  Hp  nU(       d  M  [          H1  nX!==   [        UR                  US5      =(       d    S5      -  ss'   M3     [         H  nUR                  U5      (       d  M  SX!'   M!     Mr     U$ s  snf )z?Sum counter keys and OR-merge limit flags across worker deltas.r   FT)r   r   r   r   )r   r   outr   s       r   _aggregate_worker_statsr    3   s    -GH-Gc6-GCH& '-CHEIIc1-233H .*Cyy~~ +  J Is   B T)frozenc                   J    \ rS rSr% Sr\\S'   \\S'   \\S'   S\S\4S jrS	r	g
)MPConfigC   z2Normalized internal multiprocessing configuration.enabledworkers	thresholdn_jobsr   c                 p    U R                   =(       a$    U R                  S:  =(       a    XR                  :  $ )N   r%   r&   r'   )selfr(   s     r   should_parallelizeMPConfig.should_parallelizeJ   s&    ||Mq 0MV~~5MMr    N)
__name__
__module____qualname____firstlineno____doc__r   __annotations__r   r-   __static_attributes__r/   r   r   r#   r#   C   s*    <MLNN N Nr   r#   multiprocessingmultiprocessing_workersmultiprocessing_thresholdc                 ~   U S;  a  [        SU < 35      e[        U 5      nUc/  [        R                  " 5       =(       d    Sn[	        [
        U5      nO,[        U[        5      (       a  US:  a  [        SU< 35      eUnUc  [        nO,[        U[        5      (       a  US:  a  [        SU< 35      eUn[        X5US9$ )zValidate and normalize the public multiprocessing parameters.

``multiprocessing`` accepts True/False. ``multiprocessing_workers`` accepts
None or a positive int. ``multiprocessing_threshold`` accepts None or a
non-negative int.
)TFr   r*   z+multiprocessing must be True or False; got r*   z@multiprocessing_workers must be None or a positive integer; got r   zFmultiprocessing_threshold must be None or a non-negative integer; got r+   )

ValueErrorr   os	cpu_countminDEFAULT_MAX_WORKERS
isinstancer   DEFAULT_THRESHOLDr#   )r7   r8   r9   r%   cpur&   r'   s          r   normalize_mp_configrC   N   s     11?NP
 	
 ?#G&lln!)3/1377;RUV;V*-  * (%	3S99=VYZ=Z,/  .	G	JJr   objc                 R     [         R                  " U 5        g! [         a     gf = f)zReturn True if ``obj`` round-trips through ``pickle.dumps`` cleanly.

Used to decide whether parallel execution is safe for a given input.
A False result triggers serial fallback for that section.
TF)pickledumps	Exception)rD   s    r   is_pickleablerI   w   s(    S s    
&&
parametersc                 b   [        U 5      nSUS'   [        SSSS9US'   UR                  SS5        UR                  S	S5        UR                  S
S5        UR                  SS5        UR                  SS5        UR                  SS5        UR                  SS5        UR                  SS5        U$ )a  Strip parent-process-only state from a ``_parameters`` snapshot.

The parent's ``_parameters`` may carry references that should not be reused
inside a worker (mutable shared caches) or that would cause nested
multiprocessing inside the worker. This produces a copy safe to ship.
Fr7   r*   r   r+   
_mp_config_distance_cacheNhashes_numpy_pathsr   group_by_keystree_iterable_opcodesis_root)dictr#   pop)rJ   	sanitizeds     r   _sanitize_parameters_for_workerrW      s     Z I
 $)I &ua1MIlMM#T*MM(D!MM.$'MM(D!MM/4(MM&$MM%t,MM)T"r   jobc           
          SSK Jn  SSKJn  U u  p4pVpxU" UUUUUUSS9n	U[	        [
        U	R                  5       5      [        U	5      4$ )aM  Compute the rough distance between two items in a worker process.

``job`` layout matches what ``compute_distances_parallel`` ships:
``(job_index, sanitized_parameters, removed_item, added_item,
    original_type, iterable_compare_func)``.

The worker constructs a fresh root ``DeepDiff`` (no shared parent state),
requests the DELTA_VIEW so we hit the same code path as the serial call in
``_get_rough_distance_of_hashed_objs``, and returns the resulting float
plus a ``_extract_worker_stats`` snapshot so the parent can aggregate
diff/pass/cache-hit counts into its WORKER_* stats keys.
r   DeepDiff)
DELTA_VIEW)_parametersview_original_typeiterable_compare_funccache_purge_level)deepdiff.diffr[   deepdiff.helperr\   r
   float_get_rough_distancer   )
rX   r[   r\   	job_indexrJ   removed_item
added_itemoriginal_typer`   diffs
             r   _distance_workerrk      s]    " '*\_YI<]$3
 D d5$":":"<=?TUY?ZZZr   jobsri   r`   configc           	         U (       d  0 [        / 5      4$ [        U5      n[        U5      (       d  gUb  [        U5      (       d  g[        U S   5      (       d  gSSKJnJn  / n[        U 5       H#  u  pU
S   nU
S   nUR                  XXX#45        M%     0 n/ n U" UR                  S9 nU Vs/ s H  nUR                  [        U5      PM     nnU" U5       H-  nUR                  5       u  nnnUUU'   UR                  U5        M/     SSS5        0 n[        U 5       H  u  pX   UU
S   U
S   4'   M     U[        U5      4$ s  snf ! , (       d  f       NE= f! [        R                  [        [        4 a     gf = f)a  Run ``_distance_worker`` over ``jobs`` and return distances by pair.

``jobs`` is a list of ``(added_hash, removed_hash, added_item, removed_item)``
tuples in the exact order the serial nested loop visits them. The parent
is responsible for that ordering; this helper does not reorder anything.

Returns:
    ``(distances_by_pair, aggregated_worker_stats)`` where the first item
    is a dict ``{(added_hash, removed_hash): distance}`` and the second is
    the aggregated ``_extract_worker_stats`` snapshot summed across all
    workers (counter keys summed, limit flags OR-merged). Returns
    ``None`` if the section is unsafe to parallelize (unpickleable
    inputs/parameters, worker import error, etc.). On ``None`` the caller
    MUST fall back to the serial path so correctness is preserved.

Workers may finish out of order; we collect results into a dict keyed by
the original job index, so callers see the same result regardless of
completion order.
Nr   ProcessPoolExecutoras_completed      max_workersr*   )r    rW   rI   concurrent.futuresrp   rq   	enumerateappendr&   submitrk   resultrF   PicklingErrorAttributeError	TypeError)rl   rJ   ri   r`   rm   sanitized_paramsrp   rq   payloadsirX   rh   rg   results_by_indexstats_deltasexecutorpayloadfuturesfutureidxdistancestats_deltar   s                          r   compute_distances_parallelr      s   4 *2...6zB )**(?T1U1U a!! EHD/V
1v,Ma	
 " *,)+L V^^<QYZQYgx'7AQYGZ&w/ .4]]_*X{(0 %##K0 0 = )+CD/ 0 3SVSV "'555! [ =<   .)<  	sB   E *E	/ E;E	
E E		
EE E E<;E<c                     SSK Jn  SSKJn  U u  p4pVU" U4SUSS.UD6n Xt   nXL a  US4$ X84$ ! [         a    US4s $ f = f)uI  Hash a single iterable item in a worker process.

``job`` layout: ``(job_index, item, parent_path, deephash_parameters)``.
The worker constructs a fresh ``DeepHash`` (no shared parent state) and
looks up the resulting top-level hash for ``item``. Returns
``(job_index, item_hash)`` where ``item_hash`` is None if the item could
not be processed — the parent treats that exactly like the serial path's
``KeyError`` / ``unprocessed`` skip.

UnicodeDecodeError and NotImplementedError propagate as in the serial
path; other exceptions surface in the parent through ``future.result()``.
r   )DeepHash)unprocessedNT)rN   parent
apply_hash)deepdiff.deephashr   rc   r   KeyError)	rX   r   r   rf   itemparent_pathrJ   	deep_hash	item_hashs	            r   _hash_workerr     s{     ++/2,I[	
 IO	 $	  $s   / A A c           	          SSK Jn  SSKJn  U u  p4pVnU" XVUUUSS9n/ n	UR                  R                  5        H(  u  pU
S:X  a  M  U H  nU	R                  X45        M     M*     X9[        U5      4$ )ai  Run one paired-item subtree diff in a worker process.

``job`` layout: ``(job_index, sanitized_parameters, t1, t2, _original_type)``.
The worker constructs a fresh root ``DeepDiff`` (no shared parent state),
requests the TREE_VIEW so ``self.tree`` is populated and walks it once to
flatten the leaves into ``[(report_type, leaf_difflevel), ...]``.

The parent rebases each leaf's up-chain onto its own ``change_level`` so
paths come out as if the diff had run inline. Returning bare DiffLevel
objects is acceptable here because we already proved they pickle and
re-attach cleanly (see tests/test_multiprocessing.py).
r   rZ   )	TREE_VIEW)r]   r^   r_   ra   deep_distance)rb   r[   rc   r   rQ   itemsrx   r   )rX   r[   r   rf   rJ   t1t2r_   rj   entriesreport_typelevelsleafs                r   _subtree_diff_workerr   2  s    " ')471I2>
% D &(G#yy0/)DNNK./   1
 4T:::r   c           
         U (       d  / [        / 5      4$ [        U5      n[        U5      (       d  g[        U S   5      (       d  gSSKJnJn  [        U 5       VVV	s/ s H  u  nu  pXtXU4PM     n
nnn	0 n/ n U" UR                  S9 nU
 Vs/ s H  oR                  [        U5      PM     nnU" U5       H-  nUR                  5       u  nnnUUU'   UR                  U5        M/     SSS5        [!        [#        U 5      5       Vs/ s H  o{U   PM	     sn[        U5      4$ s  sn	nnf s  snf ! , (       d  f       NL= f! [        R                  [        [        4 a     gf = fs  snf )u  Run ``_subtree_diff_worker`` over ``jobs`` and return per-job entries.

``jobs`` is a list of ``(t1_item, t2_item)`` tuples in the exact order
the serial paired-iteration code visits them. Returns
``(entries_by_job, aggregated_worker_stats)`` where ``entries_by_job`` is
a list aligned to job order — each element is ``[(report_type,
leaf_difflevel), ...]`` suitable for the parent to rebase and merge into
its tree — and ``aggregated_worker_stats`` is the per-batch ``_stats``
deltas summed across workers (counters summed, limit flags OR-merged).
Returns ``None`` when the section is unsafe to parallelize (unpickleable
parameters/items, worker import error). On ``None`` the caller MUST run
the same jobs serially so correctness is preserved.

Workers may finish out of order; results are collected by their original
job index so the merge order is identical regardless of completion order.
Nr   ro   rt   )r    rW   rI   rv   rp   rq   rw   r&   ry   r   rz   rx   rF   r{   r|   r}   rangelen)rl   rJ   ri   rm   r~   rp   rq   r   t1_itemt2_itemr   r   r   r   r   r   r   r   r   r   s                       r   compute_subtree_diffs_parallelr   Y  si   , *2...6zB)** a!!D &/t_%4!A! 
g>%4  
 :<)+L V^^<U]^U]'';WEU]G^&w/,2MMO)Wk(/ %##K0 0 = ',CI&67&6!	&67- # _ =<   .)<  	8sN   D"9D? 	D.D)-;D.(D? E$)D..
D<8D? <D? ?E! E!deephash_parametersc           	      b   U (       d  / $ [        U5      (       d  g[        U S   5      (       d  gSSKJnJn  [	        U 5       VVVs/ s H  u  nu  pgXVXq4PM     nnnn0 n	 U" UR
                  S9 n
U Vs/ s H  oR                  [        U5      PM     nnU" U5       H  nUR                  5       u  pXU'   M     SSS5        [        [        U 5      5       Vs/ s H  oYU   PM	     sn$ s  snnnf s  snf ! , (       d  f       NA= f! [        R                  [        [        4 a     gf = fs  snf )u  Run ``_hash_worker`` over ``jobs`` and return per-item hashes.

``jobs`` is a list of ``(item, parent_path)`` tuples in the exact order
the serial enumerate-loop visits them. Returns a list aligned to that
order, with ``None`` for items the worker could not hash. Returns
``None`` when the section is unsafe to parallelize (unpickleable
parameters/items, worker import error). On ``None`` the caller MUST fall
back to the serial path.

Workers may finish out of order; results are collected by their original
index so callers see the same output regardless of completion order.
Note: child object hashes computed inside each worker are NOT merged
back into the parent's ``self.hashes`` — id-based keys for unhashable
sub-objects would not match across process boundaries. Parent code that
relies on the iterable-level hash being present must continue to compute
it serially after the per-item parallel pass.
Nr   ro   rt   )rI   rv   rp   rq   rw   r&   ry   r   rz   rF   r{   r|   r}   r   r   )rl   r   rm   rp   rq   r   r   r   r   r   r   r   r   r   r   r   s                   r   compute_hashes_parallelr     s#   , 	,-- a!!D '0o&5"A" 
+3&5  
 24 V^^<MUVX'|W=XGV&w/!'(1% 0 = */s4y)9:)9AQ)9:: W =<
   .)<  ;sN   C* D 0C65C1'C6;D D,1C66
D D D D)(D))!r4   r<   rF   dataclassesr   typingr   r   r   r   r   r	   r
   r?   rA   r   r   strr   r    r#   r   rC   r   rI   rW   rd   rk   r   r   r   r   r   r/   r   r   <module>r      s'    
  ! C C C    X N  c3h "Dc3h$8 T#s(^   $N N N&K&K%c]&K  (}&K 	&KR
s 
t 
S#X 4S> 2"[	sDcNCc36	7"[
3tCH~%&"[JK6
uS#sC'(
)K6S#XK6 K6 $H-	K6
 K6 eDsCx%/0$sCx.@ABK6\ eCc4S>9:  uS(SV-EW?X  D$;	sDcNCc1	2$;
3U38_%tCH~56$;N8
uS#X
8S#X8 8 	8
 eDeCHo./c3h?@A8v1;
uS#X
1;c3h1; 1; d8C=!"	1;r   