
    +jAP                        d dl Z d dlZd dlmZ d dlmZmZmZ d dlZd dl	m
Z
mZmZmZmZmZmZmZmZmZmZmZmZmZmZmZ d dlmZmZ d dlmZmZm Z m!Z! d dl"m#Z#m$Z$ d dl%m&Z& d d	l'm(Z( d d
l)m*Z*m+Z+m,Z, d dl-m.Z.  G d de          Z/ G d de          Z0ed         Z1de1de2e3         fdZ4de2e3         fdZ5de2e3         fdZ6	 	 d9de1de7de7de8e3         dz  fdZ9de1de8e3         fdZ:de2e3         de;fdZ<de=e1df         de2e3         fdZ>	 	 d:d!e2e3         d"e8e3         d#e7d$e7de=e2e         e8e3         e;f         f
d%Z?d&e2e         d'e8e3         de;fd(Z@d ZAd)ZBd*ZCeAd+fde=e1df         d,e;d-e;de2fd.ZD	 	 	 	 d;d/e3d0e3d1e3d2e7d3e7d4eg e7f         dz  d5eEe3e#f         dz  de;fd6ZF	 	 	 	 d;d7e2d2e7d3e7d4eg e7f         dz  d5eEe3e#f         dz  de=e;e2e3         f         fd8ZGdS )<    N)Path)CallableLiteral	TypedDict)add_missing_tag_for_asset_idbulk_update_enrichment_levelbulk_update_is_missingbulk_update_needs_verifydelete_orphaned_seed_assetdelete_references_by_idsensure_tags_existget_asset_by_hashget_reference_by_idget_references_for_prefixesget_unenriched_references(mark_references_missing_outside_prefixesreassign_asset_referencesremove_missing_tag_for_asset_idset_reference_system_metadataupdate_asset_hash_and_mime)SeedAssetSpecbatch_insert_seed_assets)get_mtime_ns
is_visiblelist_files_recursivelyverify_file_unchanged)HashCheckpointcompute_blake3_hash)extract_image_dimensions)extract_file_metadata)compute_relative_filenameget_comfy_models_folders!get_name_and_tags_from_asset_path)create_sessionc                   B    e Zd ZU eed<   eed<   eed<   eed<   eed<   dS )_RefInforef_id	file_pathexistsstat_unchangedneeds_verifyN)__name__
__module____qualname__str__annotations__bool     2/home/wildlama/comfy/ComfyUI/app/assets/scanner.pyr&   r&   .   sE         KKKNNNLLLr3   r&   c                   @    e Zd ZU edz  ed<   eed<   ee         ed<   dS )_AssetAccumulatorNhashsize_dbrefs)r,   r-   r.   r/   r0   intlistr&   r2   r3   r4   r6   r6   6   s;         
*LLL
x.r3   r6   modelsinputoutputrootreturnc                 \   | dk    r7g }t                      D ]\  }}|                    |           d |D             S | dk    r1t          j                            t          j                              gS | dk    r1t          j                            t          j                              gS g S )Nr=   c                 L    g | ]!}t           j                            |          "S r2   )ospathabspath).0ps     r4   
<listcomp>z)get_prefixes_for_root.<locals>.<listcomp>D   s&    222q""222r3   r>   r?   )r"   extendrD   rE   rF   folder_pathsget_input_directoryget_output_directory)r@   bases_bucketpathss       r4   get_prefixes_for_rootrQ   ?   s    x688 	  	 NGULL22E2222w @ B BCCDDx A C CDDEEIr3   c                      d} d | D             S )z3Get all known asset prefixes across all root types.r<   c                 6    g | ]}t          |          D ]}|S r2   )rQ   )rG   r@   rH   s      r4   rI   z*get_all_known_prefixes.<locals>.<listcomp>O   s.    III$-B4-H-HIIAIIIIr3   r2   )	all_rootss    r4   get_all_known_prefixesrU   L   s    &CIII)IIIIr3   c                     g } t                      D ]\  }}t          j        |          pg }|D ]}t          d t	          |          j        D                       s.t          j        ||          }|sFt          j        	                    |          }d}t	          |          }|D ]8}|
                    t          j        	                    |                    rd} n9|r|                     |           Ɍ| S )Nc              3   4   K   | ]}t          |          V  d S )N)r   )rG   parts     r4   	<genexpr>z'collect_models_files.<locals>.<genexpr>W   s*      IIDz$''IIIIIIr3   FT)r"   rK   get_filename_listallr   partsget_full_pathrD   rE   rF   is_relative_toappend)	outfolder_namerN   	rel_filesrel_pathabs_pathallowedabs_pbs	            r4   collect_models_filesrh   R   s   C688 % %U 2;??E2	! 	% 	%HIIDNN4HIIIII #1+xHHH wx00HGNNE  ''(:(:;; "GE  %

8$$$	% Jr3   Fcollect_existing_pathsupdate_missing_tagsc           	      P   t          |          }|s|rt                      ndS t          | ||          }i }|D ]}|                    |j                  }||j        |j        g d}|||j        <   d}		 d}
t          |j        |d         t          j
        |j        d                    }	nh# t          $ r d}
Y nZt          $ r d}
t          j        d	|j                   Y n3t           $ r'}d}
t          j        d
|j        |           Y d}~nd}~ww xY w|d                             |j        |j        |
|	|j        d           g }g }g }g }g }t                      }|                                D ]5\  }}|d         }|d         }t+          d |D                       }t-          d |D                       }|D ]}|d         s|                    |d                    &|d         r>|                    |d                    |d         r|                    |d                    |d         s#|d         s|                    |d                    |\|r|rt/          | |           nE|D ]B}|d         r8|                    t          j                            |d                              C@|rk|D ]%}|d         s|                    |d                    &|r@	 t7          | |           nq# t8          $ r }t          j        d||           Y d}~nLd}~ww xY wnC|rA	 t=          | |d           n-# t8          $ r }t          j        d||           Y d}~nd}~ww xY w|D ]B}|d         r8|                    t          j                            |d                              C7t?          | |           t          |          fd|D             }tA          | |d           tA          | |d           tC          | |d           tC          | |d           |r|ndS )a  Reconcile asset references with filesystem for a root.

    - Toggle needs_verify per reference using mtime/size stat check
    - For hashed assets with at least one stat-unchanged ref: delete stale missing refs
    - For seed assets with all refs missing: delete Asset and its references
    - Optionally add/remove 'missing' tags based on stat check in this root
    - Optionally return surviving absolute paths

    Args:
        session: Database session
        root: Root type to scan
        collect_existing_paths: If True, return set of surviving file paths
        update_missing_tags: If True, update 'missing' tags based on file status

    Returns:
        Set of surviving absolute paths if collect_existing_paths=True, else None
    N)include_missing)r7   r8   r9   FTr8   follow_symlinks)mtime_dbr8   stat_resultzPermission denied accessing %szOSError checking %s: %sr9   )r'   r(   r)   r*   r+   r7   c              3   &   K   | ]}|d          V  dS )r*   Nr2   rG   rs     r4   rY   z2sync_references_with_filesystem.<locals>.<genexpr>   s(      >>AA./>>>>>>r3   c              3   (   K   | ]}|d           V  dS )r)   Nr2   rr   s     r4   rY   z2sync_references_with_filesystem.<locals>.<genexpr>   s(      88aak/888888r3   r)   r'   r*   r+   r(   )asset_idz-Failed to remove missing tag for asset %s: %s	automatic)ru   originz*Failed to add missing tag for asset %s: %sc                     g | ]}|v|	S r2   r2   )rG   r'   	stale_sets     r4   rI   z3sync_references_with_filesystem.<locals>.<listcomp>   s#    WWW&vY?V?Vv?V?V?Vr3   )value)"rQ   setr   getru   
asset_hash
size_bytesr   mtime_nsrD   statr(   FileNotFoundErrorPermissionErrorloggingdebugOSErrorr_   reference_idr+   itemsanyr[   r   addrE   rF   r   	Exceptionwarningr   r   r	   r
   )sessionr@   ri   rj   prefixesrowsby_assetrowaccr*   r)   eto_set_verifyto_clear_verifystale_ref_idsto_mark_missingto_clear_missing	survivorsaida_hashr9   any_unchangedall_missingrs   ry   s                           @r4   sync_references_with_filesystemr   h   s{   . %T**H 9.8suuuD8&+>  D .0H 
 
ll3<((;>cnbQQC%(HS\"	GF2IGCM4HHH  NN
 ! 	 	 	FFF 	K 	K 	KFM:CMJJJJJ 	G 	G 	GFM3S]AFFFFFFFF	G 	F* ] "0 # 0 	
 	
 	
 	
  "M!#O!M!#O"$%%INN$$ -? -?SV6{>>>>>>>88488888 		2 		2AX; &&q{333!" 8 ''(444^$ 8#**1X;777%& 2q/@ 2$$Qx[111> G G*7C8888 G GA{ G!bgooan&E&EFFF 	V 6 6{ 6!((8555" 3GcJJJJJ    OGa        ! 	VV,Ws;WWWWW V V V LcSTUUUUUUUUV  	? 	?A{ ?bgooan==>>>	? Wm444M""IWWWWOWWWO7O4@@@@7$4EBBBBWm4@@@@WoUCCCC.899D8sT   78B00D>%D%	D.DD#K55
L?LL&L99
M#MM#c                 8   	 t                      5 }t          || dd          }|                                 |pt                      cddd           S # 1 swxY w Y   dS # t          $ r.}t          j        d| |           t                      cY d}~S d}~ww xY w)zzSync a single root's references with the filesystem.

    Returns survivors (existing paths) or empty set on failure.
    T)ri   rj   Nzfast DB scan failed for %s: %s)r$   r   commitr{   r   r   	exception)r@   sessr   r   s       r4   sync_root_safelyr      s    
 	&7'+$(	  I KKMMM%	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	& 	&    :D!DDDuus@   A! 7AA! AA! AA! !
B+#BBBr   c                     	 t                      5 }t          ||           }|                                 |cddd           S # 1 swxY w Y   dS # t          $ r }t	          j        d|           Y d}~dS d}~ww xY w)zMark references as missing when outside the given prefixes.

    This is a non-destructive soft-delete. Returns count marked or 0 on failure.
    Nz!marking missing assets failed: %sr   )r$   r   r   r   r   r   )r   r   countr   s       r4   $mark_missing_outside_prefixes_safelyr      s    
 	<T8LLEKKMMM	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	    =qAAAqqqqqs9   A &AA AA 
AA 
A:A55A:roots.c                 0   g }d| v r!|                     t                                 d| v r3|                     t          t          j                                         d| v r3|                     t          t          j                                         |S )z+Collect all file paths for the given roots.r=   r>   r?   )rJ   rh   r   rK   rL   rM   )r   rP   s     r4   collect_paths_for_rootsr     s    E5)++,,,%+L,L,N,NOOPPP5+L,M,O,OPPQQQLr3   TrP   existing_pathsenable_metadata_extractioncompute_hashesc                    g }t                      }d}| D ]/}t          j                            |          }||v r|dz  },	 t          j        |d          }	n# t
          $ r Y Pw xY w|	j        s\t          |          \  }
}t          |          }d}|rt          ||	|          }d}|rF	 t          |          \  }}d|z   }n-# t          $ r }t          j        d||           Y d}~nd}~ww xY w|r|j        nd}|                    ||	j        t!          |	          |
|||||dd	
           |                    |           1|||fS )
ae  Build asset specs from paths, returning (specs, tag_pool, skipped_count).

    Args:
        paths: List of file paths to process
        existing_paths: Set of paths that already exist in the database
        enable_metadata_extraction: If True, extract tier 1 & 2 metadata
        compute_hashes: If True, compute blake3 hashes (slow for large files)
    r      Trm   Nrp   relative_filenameblake3:Failed to hash %s: %s)
rd   r~   r   	info_nametagsfnamemetadatar7   	mime_typejob_id)r{   rD   rE   rF   r   r   st_sizer#   r!   r    r   r   r   r   content_typer_   r   update)rP   r   r   r   specstag_poolskippedrH   rf   stat_pnamer   	rel_fnamer   r}   digest_r   r   s                      r4   build_asset_specsr     s    "$EHG / /""N""qLG	WUD999FF 	 	 	H	~ 	6u==
d-e44	 % 	,""+  H "&
 	CC/66	&/

 C C C 7BBBBBBBBC .6?H))4	!$n(00!"$"& 	
 	
 	
 	(G##s*   A
A&%A&-C
C/C**C/r   r   c                     | sdS t                      5 }|rt          ||           t          || d          }|                                 |j        cddd           S # 1 swxY w Y   dS )zBInsert asset specs into database, returning count of created refs.r    )r   owner_idN)r$   r   r   r   inserted_refs)r   r   r   results       r4   insert_asset_specsr   `  s     q			 $T 	.dH---)$ebIII#$ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $ $s   ?AA#&A#r      i  	max_levellimitc                     g }| D ]$}|                     t          |                     %|sg S t                      5 }t          ||||          cddd           S # 1 swxY w Y   dS )a  Get assets that need enrichment for the given roots.

    Args:
        roots: Tuple of root types to scan
        max_level: Maximum enrichment level to include
        limit: Maximum number of rows to return

    Returns:
        List of UnenrichedReferenceRow
    )r   r   N)rJ   rQ   r$   r   )r   r   r   r   r@   r   s         r4   get_unenriched_assets_for_rootsr   r  s     H 5 5-d334444 				 
T((iu
 
 

 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
s   AA #A r(   r   ru   extract_metadatacompute_hashinterrupt_checkhash_checkpointsc                    t           }	 t          j        |d          }	n# t          $ r |cY S w xY wt	          |	          }
t          |          }d}d}|r"t          ||	|          }|r|j        }t          }d}|rd	 t	          |	          }|	j	        }d}|}|
                    |          }|ft          j        |d          }|j        t	          |          k    s|j        |j	        k    rd}|                    |d           nt	          |          }t          |||          \  }}|||||_        ||_        |||<   |S ||                    |d           t          j        |d          }t	          |          }||k    rt          j        d|           nd| }| p|du}|rt"          }n-# t$          $ r }t          j        d||           Y d}~nd}~ww xY wt'          | |          }||j        |
k    r0|                                  t          j        d	|           t           S |rf|rd|                                }|r=|                    d
          r(t1          ||          }|r|                    |           t5          | ||           |rqt7          | |          }|rL|j        |k    rAt;          | ||j        |           t=          | |           |rt?          | |j        |           n't?          | |||           n|rt?          | ||           tA          | |g|           | !                                 |S )a  Enrich a single asset with metadata and/or hash.

    Args:
        session: Database session (caller manages lifecycle)
        file_path: Absolute path to the file
        reference_id: ID of the reference to update
        asset_id: ID of the asset to update (for mime_type and hash)
        extract_metadata: If True, extract safetensors header and mime type
        compute_hash: If True, compute blake3 hash
        interrupt_check: Optional non-blocking callable that returns True if
            the operation should be interrupted (e.g. paused or cancelled)
        hash_checkpoints: Optional dict for saving/restoring hash progress
            across interruptions, keyed by file path

    Returns:
        New enrichment level achieved
    Trm   Nr   )r   
checkpointz1File modified during hashing, discarding hash: %sr   r   z?Ref %s mtime changed during enrichment, discarding stale resultzimage/)r   )"ENRICHMENT_STUBrD   r   r   r   r!   r    r   ENRICHMENT_METADATAr   r|   r   	file_sizepopr   r   r   ENRICHMENT_HASHEDr   r   rollbackinfoto_user_metadata
startswithr   r   r   r   idr   r   r   r   r   )r   r(   r   ru   r   r   r   r   	new_levelr   initial_mtime_nsr   r   r   	full_hashmtime_beforesize_beforer   cur_statr   new_checkpoint
stat_aftermtime_aftermetadata_okr   refsystem_metadatadimsexistings                                r4   enrich_assetr     s5   6  ID999    $F++))44I IH ,('
 
 

  	, -I+I I .C-	C'//L .K J+-11)<<
)!wy$GGGH"+|H/E/EEE)3x7GGG%)
(,,Y===='3H'='=%8 /%& & &"FN ~#/N4N.:N+/:N,2@$Y/    + $$Y555DAAAJ&z22K{** SU^____.f..	"22Jhd6J 2 1I 	C 	C 	CO3YBBBBBBBB	C g|
4
4C
{cl&666M	
 	
 	
  NH N"3355 	---h77 	-+IKKKD -&&t,,,%g|_MMM 
K$Wi88 	Px//%gxlSSS&w999 V*7HK9UUUU&w)YOOOO	 K"7H	JJJJ <.)DDDNNs,     //>CF4 A.F4 4
G>GGr   c                    d}g }t                      5 }| D ]}| |            r n	 t          ||j        |j        |j        ||||          }	|	|j        k    r|dz  }n|                    |j                   c# t          $ rS}
t          j	        d|j        |
           |
                                 |                    |j                   Y d}
~
d}
~
ww xY wddd           n# 1 swxY w Y   ||fS )a  Enrich a batch of assets.

    Uses a single DB session for the entire batch, committing after each
    individual asset to avoid long-held transactions while eliminating
    per-asset session creation overhead.

    Args:
        rows: List of UnenrichedReferenceRow from get_unenriched_assets_for_roots
        extract_metadata: If True, extract metadata for each asset
        compute_hash: If True, compute hash for each asset
        interrupt_check: Optional non-blocking callable that returns True if
            the operation should be interrupted (e.g. paused or cancelled)
        hash_checkpoints: Optional dict for saving/restoring hash progress
            across interruptions, keyed by file path

    Returns:
        Tuple of (enriched_count, failed_reference_ids)
    r   N)r(   r   ru   r   r   r   r   r   zFailed to enrich %s: %s)r$   r   r(   r   ru   enrichment_levelr_   r   r   r   r   )r   r   r   r   r   enriched
failed_idsr   r   r   r   s              r4   enrich_assets_batchr     st   2 HJ			 4T 	4 	4C*/@/@*4(!m!$!1 \%5!-$3%5	 	 		 s333MHH%%c&6777 4 4 4 93=!LLL!!#"2333333334)4 4 4 4 4 4 4 4 4 4 4 4 4 4 42 Zs<   C%AA98C%9
CA	CC%CC%%C),C))FF)TF)TFNN)Hr   rD   pathlibr   typingr   r   r   rK   app.assets.database.queriesr   r   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r   app.assets.services.bulk_ingestr   r   app.assets.services.file_utilsr   r   r   r   app.assets.services.hashingr   r   $app.assets.services.image_dimensionsr   $app.assets.services.metadata_extractr    app.assets.services.path_utilsr!   r"   r#   app.database.dbr$   r&   r6   RootTyper;   r/   rQ   rU   rh   r1   r{   r   r   r:   r   tupler   r   r   r   r   r   r   dictr   r   r2   r3   r4   <module>r      s    				       / / / / / / / / / /                                       $                   L K K K K K K K I I I I I I F F F F F F         
 + * * * * *    y       	    ./
 
T#Y 
 
 
 
JS	 J J J Jd3i    2 $) %	9 9
9 !9 	9
 	X_9 9 9 9D8 C    (49     	53#7 	DI 	 	 	 	 (, 	C$ C$9C$HC$ !%C$ 	C$
 4C#-.C$ C$ C$ C$N	$d=1 	$SX 	$# 	$ 	$ 	$ 	$   
 %
 
3

 
 
	
 
 
 
B "159=E EE E 	E
 E E b$h'$.E 3./$6E 	E E E ET "159=5  5 
5 5  5  b$h'$.	5 
 3./$65  3S	>5  5  5  5  5  5 r3   