mirror of
https://github.com/vale46n1/immich_duplicate_finder.git
synced 2025-12-13 20:35:46 +01:00
Refactor delete function
Refactor delete function to prevent repeated refresh of the vector database by introducing a new state variable
This commit is contained in:
140
imageProcessing.py
Normal file
140
imageProcessing.py
Normal file
@@ -0,0 +1,140 @@
|
||||
import streamlit as st
|
||||
import time
|
||||
from imagehash import phash
|
||||
from db import saveAssetInfoToDb, isAssetProcessed
|
||||
from immichApi import streamAsset
|
||||
import gc
|
||||
from faissCalc import update_faiss_index
|
||||
|
||||
def calculatepHashPhotos(assets, immich_server_url, api_key):
|
||||
if 'message' not in st.session_state or st.button('Start Processing'):
|
||||
st.session_state['message'] = ""
|
||||
if 'progress' not in st.session_state:
|
||||
st.session_state['progress'] = 0
|
||||
|
||||
|
||||
progress_bar = st.progress(st.session_state['progress'])
|
||||
stop_button = st.button('Stop Processing')
|
||||
message_placeholder = st.empty()
|
||||
|
||||
total_assets = len(assets)
|
||||
processed_assets = 0
|
||||
skipped_assets = 0
|
||||
error_assets = 0
|
||||
total_time = 0
|
||||
|
||||
for i, asset in enumerate(assets):
|
||||
|
||||
if stop_button:
|
||||
st.session_state['message'] += "Processing stopped by user.\n"
|
||||
message_placeholder.text(st.session_state['message'])
|
||||
break
|
||||
|
||||
asset_id = asset.get('id')
|
||||
start_time = time.time()
|
||||
|
||||
if not isAssetProcessed(asset_id):
|
||||
image = streamAsset(asset_id, immich_server_url, "Original Photo (slow)", api_key)
|
||||
image_phash=''
|
||||
if image is not None:
|
||||
image_phash = phash(image)
|
||||
saveAssetInfoToDb(asset_id, str(image_phash), asset)
|
||||
processed_assets += 1
|
||||
st.session_state['message'] += f"Processed and saved asset {asset_id}\n"
|
||||
|
||||
# Explicitly delete the image object and free memory
|
||||
del image
|
||||
gc.collect()
|
||||
else:
|
||||
st.session_state['message'] += f"Failed to fetch image for asset {asset_id}\n"
|
||||
error_assets += 1
|
||||
else:
|
||||
st.session_state['message'] += f"Asset {asset_id} has already been processed. Skipping.\n"
|
||||
skipped_assets += 1
|
||||
|
||||
end_time = time.time()
|
||||
processing_time = end_time - start_time
|
||||
total_time += processing_time
|
||||
|
||||
# Calculate the average processing time per asset
|
||||
average_time_per_asset = total_time / processed_assets if processed_assets > 0 else 0
|
||||
estimated_time_remaining = average_time_per_asset * (total_assets - processed_assets)
|
||||
estimated_time_remaining_min = int(estimated_time_remaining/60)
|
||||
|
||||
# Update the UI
|
||||
progress_percentage = (i + 1) / total_assets
|
||||
st.session_state['progress'] = progress_percentage
|
||||
progress_bar.progress(progress_percentage)
|
||||
|
||||
st.session_state['message'] += f"Estimated time remaining: {estimated_time_remaining_min} minutes\n"
|
||||
st.session_state['message'] += f"Asset {i + 1} / {total_assets} - (processed {processed_assets} - skipped {skipped_assets} - error {error_assets})\n"
|
||||
message_placeholder.text(st.session_state['message']) # Update the placeholder with the new message
|
||||
st.session_state['message']=''
|
||||
|
||||
if processed_assets >= total_assets:
|
||||
st.session_state['message'] += "Processing complete!"
|
||||
message_placeholder.text(st.session_state['message'])
|
||||
progress_bar.progress(1.0)
|
||||
|
||||
def calculateFaissIndex(assets, immich_server_url, api_key):
|
||||
# Initialize session state variables if they are not already set
|
||||
if 'message' not in st.session_state:
|
||||
st.session_state['message'] = ""
|
||||
if 'progress' not in st.session_state:
|
||||
st.session_state['progress'] = 0
|
||||
if 'stop_index' not in st.session_state:
|
||||
st.session_state['stop_index'] = False
|
||||
|
||||
# Set up the UI components
|
||||
progress_bar = st.progress(st.session_state['progress'])
|
||||
stop_button = st.button('Stop Index Processing')
|
||||
message_placeholder = st.empty()
|
||||
|
||||
# Check if stop was requested and reset it if button is pressed
|
||||
if stop_button:
|
||||
st.session_state['stop_index'] = True
|
||||
st.session_state['calculate_faiss'] = False
|
||||
|
||||
total_assets = len(assets)
|
||||
processed_assets = 0
|
||||
skipped_assets = 0
|
||||
error_assets = 0
|
||||
total_time = 0
|
||||
|
||||
for i, asset in enumerate(assets):
|
||||
if st.session_state['stop_index']:
|
||||
st.session_state['message'] = "Processing stopped by user."
|
||||
message_placeholder.text(st.session_state['message'])
|
||||
break # Break the loop if stop is requested
|
||||
|
||||
asset_id = asset.get('id')
|
||||
start_time = time.time()
|
||||
|
||||
status = update_faiss_index(immich_server_url,api_key, asset_id)
|
||||
if status == 'processed':
|
||||
processed_assets += 1
|
||||
elif status == 'skipped':
|
||||
skipped_assets += 1
|
||||
elif status == 'error':
|
||||
error_assets += 1
|
||||
|
||||
end_time = time.time()
|
||||
processing_time = end_time - start_time
|
||||
total_time += processing_time
|
||||
|
||||
# Update progress and messages
|
||||
progress_percentage = (i + 1) / total_assets
|
||||
st.session_state['progress'] = progress_percentage
|
||||
progress_bar.progress(progress_percentage)
|
||||
estimated_time_remaining = (total_time / (i + 1)) * (total_assets - (i + 1))
|
||||
estimated_time_remaining_min = int(estimated_time_remaining / 60)
|
||||
|
||||
st.session_state['message'] = f"Processing asset {i + 1}/{total_assets} - (Processed: {processed_assets}, Skipped: {skipped_assets}, Errors: {error_assets}). Estimated time remaining: {estimated_time_remaining_min} minutes."
|
||||
message_placeholder.text(st.session_state['message'])
|
||||
|
||||
# Reset stop flag at the end of processing
|
||||
st.session_state['stop_index'] = False
|
||||
if processed_assets >= total_assets:
|
||||
st.session_state['message'] = "Processing complete!"
|
||||
message_placeholder.text(st.session_state['message'])
|
||||
progress_bar.progress(1.0)
|
||||
50
utility.py
50
utility.py
@@ -1,8 +1,8 @@
|
||||
from datetime import datetime
|
||||
import streamlit as st
|
||||
from datetime import datetime
|
||||
from api import deleteAsset, updateAsset
|
||||
from db import delete_duplicate_pair
|
||||
from immichApi import deleteAsset
|
||||
from db import getHashFromDb
|
||||
|
||||
def compare_and_color_data(value1, value2):
|
||||
date1 = datetime.fromisoformat(value1.rstrip('Z'))
|
||||
@@ -24,7 +24,7 @@ def compare_and_color(value1, value2):
|
||||
else:
|
||||
return f"{value1}"
|
||||
|
||||
def display_asset_column(col, asset1_info, asset2_info, asset_id_1,asset_id_2, server_url, api_key):
|
||||
def display_asset_column(col, asset1_info, asset2_info, asset_id_1, server_url, api_key):
|
||||
details = f"""
|
||||
- **File name:** {asset1_info[1]}
|
||||
- **Photo with ID:** {asset_id_1}
|
||||
@@ -47,11 +47,47 @@ def display_asset_column(col, asset1_info, asset2_info, asset_id_1,asset_id_2, s
|
||||
if deleteAsset(server_url, asset_id_1, api_key):
|
||||
st.success(f"Deleted photo {asset_id_1}")
|
||||
st.session_state[f'deleted_photo_{asset_id_1}'] = True
|
||||
st.session_state['show_faiss_duplicate'] = False
|
||||
#remove from asset db
|
||||
delete_duplicate_pair(asset_id_1,asset_id_2)
|
||||
else:
|
||||
st.error(f"Failed to delete photo {asset_id_1}")
|
||||
except Exception as e:
|
||||
st.error(f"An error occurred: {str(e)}")
|
||||
print(f"Failed to delete photo {asset_id_1}: {str(e)}")
|
||||
print(f"Failed to delete photo {asset_id_1}: {str(e)}")
|
||||
|
||||
def findDuplicatesHash(assets,model):
|
||||
"""Find and return duplicates based on file hash, correlating specific resolutions."""
|
||||
seen_hashes = {}
|
||||
duplicates = []
|
||||
resolution_counts = {} # Track resolution correlations for the same hash
|
||||
|
||||
for asset in assets:
|
||||
if not st.session_state.get('is_trashed', False) and asset.get('isTrashed', False):
|
||||
continue # Skip trashed assets if include_trashed is False
|
||||
|
||||
resolution_height = asset.get('exifInfo', {}).get('exifImageHeight', 'Unknown')
|
||||
resolution_width = asset.get('exifInfo', {}).get('exifImageWidth', 'Unknown')
|
||||
resolution = "{} x {}".format(resolution_height, resolution_width)
|
||||
|
||||
if model=='thumbhash':
|
||||
file_hash = asset.get('thumbhash')
|
||||
if model=='dbhash':
|
||||
file_hash = getHashFromDb(asset.get('id'))
|
||||
else:
|
||||
file_hash = asset.get('thumbhash')
|
||||
|
||||
if file_hash in seen_hashes:
|
||||
# Add the current asset as a duplicate
|
||||
duplicates.append((seen_hashes[file_hash], asset))
|
||||
|
||||
# Increment count for this resolution among duplicates
|
||||
resolution_counts[resolution] = resolution_counts.get(resolution, 0) + 1
|
||||
|
||||
# Also update for the resolution of the asset previously seen with this hash
|
||||
prev_asset = seen_hashes[file_hash]
|
||||
prev_resolution_height = prev_asset.get('exifInfo', {}).get('exifImageHeight', 'Unknown')
|
||||
prev_resolution_width = prev_asset.get('exifInfo', {}).get('exifImageWidth', 'Unknown')
|
||||
prev_resolution = "{} x {}".format(prev_resolution_height, prev_resolution_width)
|
||||
resolution_counts[prev_resolution] = resolution_counts.get(prev_resolution, 0) + 1
|
||||
else:
|
||||
seen_hashes[file_hash] = asset
|
||||
|
||||
return duplicates, resolution_counts
|
||||
Reference in New Issue
Block a user