Refactor delete function

Refactor delete function to prevent repeated refresh of the vector database by introducing a new state variable
This commit is contained in:
Thess
2024-04-23 22:33:17 +02:00
committed by GitHub
parent f06c6de9f4
commit b353e29440
2 changed files with 183 additions and 7 deletions

140
imageProcessing.py Normal file
View File

@@ -0,0 +1,140 @@
import streamlit as st
import time
from imagehash import phash
from db import saveAssetInfoToDb, isAssetProcessed
from immichApi import streamAsset
import gc
from faissCalc import update_faiss_index
def calculatepHashPhotos(assets, immich_server_url, api_key):
if 'message' not in st.session_state or st.button('Start Processing'):
st.session_state['message'] = ""
if 'progress' not in st.session_state:
st.session_state['progress'] = 0
progress_bar = st.progress(st.session_state['progress'])
stop_button = st.button('Stop Processing')
message_placeholder = st.empty()
total_assets = len(assets)
processed_assets = 0
skipped_assets = 0
error_assets = 0
total_time = 0
for i, asset in enumerate(assets):
if stop_button:
st.session_state['message'] += "Processing stopped by user.\n"
message_placeholder.text(st.session_state['message'])
break
asset_id = asset.get('id')
start_time = time.time()
if not isAssetProcessed(asset_id):
image = streamAsset(asset_id, immich_server_url, "Original Photo (slow)", api_key)
image_phash=''
if image is not None:
image_phash = phash(image)
saveAssetInfoToDb(asset_id, str(image_phash), asset)
processed_assets += 1
st.session_state['message'] += f"Processed and saved asset {asset_id}\n"
# Explicitly delete the image object and free memory
del image
gc.collect()
else:
st.session_state['message'] += f"Failed to fetch image for asset {asset_id}\n"
error_assets += 1
else:
st.session_state['message'] += f"Asset {asset_id} has already been processed. Skipping.\n"
skipped_assets += 1
end_time = time.time()
processing_time = end_time - start_time
total_time += processing_time
# Calculate the average processing time per asset
average_time_per_asset = total_time / processed_assets if processed_assets > 0 else 0
estimated_time_remaining = average_time_per_asset * (total_assets - processed_assets)
estimated_time_remaining_min = int(estimated_time_remaining/60)
# Update the UI
progress_percentage = (i + 1) / total_assets
st.session_state['progress'] = progress_percentage
progress_bar.progress(progress_percentage)
st.session_state['message'] += f"Estimated time remaining: {estimated_time_remaining_min} minutes\n"
st.session_state['message'] += f"Asset {i + 1} / {total_assets} - (processed {processed_assets} - skipped {skipped_assets} - error {error_assets})\n"
message_placeholder.text(st.session_state['message']) # Update the placeholder with the new message
st.session_state['message']=''
if processed_assets >= total_assets:
st.session_state['message'] += "Processing complete!"
message_placeholder.text(st.session_state['message'])
progress_bar.progress(1.0)
def calculateFaissIndex(assets, immich_server_url, api_key):
# Initialize session state variables if they are not already set
if 'message' not in st.session_state:
st.session_state['message'] = ""
if 'progress' not in st.session_state:
st.session_state['progress'] = 0
if 'stop_index' not in st.session_state:
st.session_state['stop_index'] = False
# Set up the UI components
progress_bar = st.progress(st.session_state['progress'])
stop_button = st.button('Stop Index Processing')
message_placeholder = st.empty()
# Check if stop was requested and reset it if button is pressed
if stop_button:
st.session_state['stop_index'] = True
st.session_state['calculate_faiss'] = False
total_assets = len(assets)
processed_assets = 0
skipped_assets = 0
error_assets = 0
total_time = 0
for i, asset in enumerate(assets):
if st.session_state['stop_index']:
st.session_state['message'] = "Processing stopped by user."
message_placeholder.text(st.session_state['message'])
break # Break the loop if stop is requested
asset_id = asset.get('id')
start_time = time.time()
status = update_faiss_index(immich_server_url,api_key, asset_id)
if status == 'processed':
processed_assets += 1
elif status == 'skipped':
skipped_assets += 1
elif status == 'error':
error_assets += 1
end_time = time.time()
processing_time = end_time - start_time
total_time += processing_time
# Update progress and messages
progress_percentage = (i + 1) / total_assets
st.session_state['progress'] = progress_percentage
progress_bar.progress(progress_percentage)
estimated_time_remaining = (total_time / (i + 1)) * (total_assets - (i + 1))
estimated_time_remaining_min = int(estimated_time_remaining / 60)
st.session_state['message'] = f"Processing asset {i + 1}/{total_assets} - (Processed: {processed_assets}, Skipped: {skipped_assets}, Errors: {error_assets}). Estimated time remaining: {estimated_time_remaining_min} minutes."
message_placeholder.text(st.session_state['message'])
# Reset stop flag at the end of processing
st.session_state['stop_index'] = False
if processed_assets >= total_assets:
st.session_state['message'] = "Processing complete!"
message_placeholder.text(st.session_state['message'])
progress_bar.progress(1.0)

View File

@@ -1,8 +1,8 @@
from datetime import datetime
import streamlit as st
from datetime import datetime
from api import deleteAsset, updateAsset
from db import delete_duplicate_pair
from immichApi import deleteAsset
from db import getHashFromDb
def compare_and_color_data(value1, value2):
date1 = datetime.fromisoformat(value1.rstrip('Z'))
@@ -24,7 +24,7 @@ def compare_and_color(value1, value2):
else:
return f"{value1}"
def display_asset_column(col, asset1_info, asset2_info, asset_id_1,asset_id_2, server_url, api_key):
def display_asset_column(col, asset1_info, asset2_info, asset_id_1, server_url, api_key):
details = f"""
- **File name:** {asset1_info[1]}
- **Photo with ID:** {asset_id_1}
@@ -47,11 +47,47 @@ def display_asset_column(col, asset1_info, asset2_info, asset_id_1,asset_id_2, s
if deleteAsset(server_url, asset_id_1, api_key):
st.success(f"Deleted photo {asset_id_1}")
st.session_state[f'deleted_photo_{asset_id_1}'] = True
st.session_state['show_faiss_duplicate'] = False
#remove from asset db
delete_duplicate_pair(asset_id_1,asset_id_2)
else:
st.error(f"Failed to delete photo {asset_id_1}")
except Exception as e:
st.error(f"An error occurred: {str(e)}")
print(f"Failed to delete photo {asset_id_1}: {str(e)}")
print(f"Failed to delete photo {asset_id_1}: {str(e)}")
def findDuplicatesHash(assets,model):
"""Find and return duplicates based on file hash, correlating specific resolutions."""
seen_hashes = {}
duplicates = []
resolution_counts = {} # Track resolution correlations for the same hash
for asset in assets:
if not st.session_state.get('is_trashed', False) and asset.get('isTrashed', False):
continue # Skip trashed assets if include_trashed is False
resolution_height = asset.get('exifInfo', {}).get('exifImageHeight', 'Unknown')
resolution_width = asset.get('exifInfo', {}).get('exifImageWidth', 'Unknown')
resolution = "{} x {}".format(resolution_height, resolution_width)
if model=='thumbhash':
file_hash = asset.get('thumbhash')
if model=='dbhash':
file_hash = getHashFromDb(asset.get('id'))
else:
file_hash = asset.get('thumbhash')
if file_hash in seen_hashes:
# Add the current asset as a duplicate
duplicates.append((seen_hashes[file_hash], asset))
# Increment count for this resolution among duplicates
resolution_counts[resolution] = resolution_counts.get(resolution, 0) + 1
# Also update for the resolution of the asset previously seen with this hash
prev_asset = seen_hashes[file_hash]
prev_resolution_height = prev_asset.get('exifInfo', {}).get('exifImageHeight', 'Unknown')
prev_resolution_width = prev_asset.get('exifInfo', {}).get('exifImageWidth', 'Unknown')
prev_resolution = "{} x {}".format(prev_resolution_height, prev_resolution_width)
resolution_counts[prev_resolution] = resolution_counts.get(prev_resolution, 0) + 1
else:
seen_hashes[file_hash] = asset
return duplicates, resolution_counts