diff --git a/CHANGELOG.md b/CHANGELOG.md index b62baeb..e9b11fc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,11 +3,17 @@ ## Changes 10/4/2025 v1.3.13 fix(scanner): resolve dirs via CLI/env/constants; write per-item JSON; skip trash +fix(scanner): rebuild per-folder metadata to match File/Folder models - scan_uploads.php now falls back to UPLOAD_DIR/META_DIR from config.php - prevents double slashes in metadata paths; respects app timezone -- skips trash/profile_pics and symlinks; writes JSON only when missing - unblocks SCAN_ON_START so externally added files are indexed at boot +- Writes per-folder metadata files (root_metadata.json / folder_metadata.json) using the same naming rule as the models +- Adds missing entries for files (uploaded, modified using DATE_TIME_FORMAT, uploader=Imported) +- Prunes stale entries for files that no longer exist +- Skips uploads/trash and symlinks +- Resolves paths from CLI flags, env vars, or config constants (UPLOAD_DIR/META_DIR) +- Idempotent; safe to run at startup via SCAN_ON_START ## Changes 10/4/2025 v1.3.12 diff --git a/scripts/scan_uploads.php b/scripts/scan_uploads.php index 05204be..5e6ee85 100644 --- a/scripts/scan_uploads.php +++ b/scripts/scan_uploads.php @@ -1,95 +1,142 @@ root_metadata.json + * "" -> str_replace(['/', '\\', ' '], '-', '') . '_metadata.json' */ require_once __DIR__ . '/../config/config.php'; -// Resolve directories from CLI opts, env, or config.php constants (in that order). -$opt = getopt('', ['upload-dir::','metadata-dir::']) ?: []; -$uploadDir = $opt['upload-dir'] ?? getenv('upload_dir') ?? getenv('UPLOAD_DIR') ?? (defined('UPLOAD_DIR') ? UPLOAD_DIR : null); -$metadataDir = $opt['metadata-dir'] ?? getenv('metadata_dir') ?? getenv('META_DIR') ?? (defined('META_DIR') ? META_DIR : null); +// ---------- helpers that mirror model behavior ---------- -if (!$uploadDir || !$metadataDir) { - fwrite(STDERR, "Missing configuration for upload_dir or metadata_dir\n"); - exit(1); +/** Compute the metadata JSON path for a folder key (e.g., "root", "invoices/2025"). */ +function folder_metadata_path(string $folderKey): string { + if (strtolower(trim($folderKey)) === 'root' || trim($folderKey) === '') { + return rtrim(META_DIR, '/\\') . '/root_metadata.json'; + } + $safe = str_replace(['/', '\\', ' '], '-', trim($folderKey)); + return rtrim(META_DIR, '/\\') . '/' . $safe . '_metadata.json'; } -// Normalize with exactly one trailing slash -$uploadDir = rtrim($uploadDir, '/\\') . '/'; -$metadataDir = rtrim($metadataDir, '/\\') . '/'; +/** Turn an absolute path under UPLOAD_DIR into a folder key (“root” or relative with slashes). */ +function to_folder_key(string $absPath): string { + $base = rtrim(UPLOAD_DIR, '/\\') . DIRECTORY_SEPARATOR; + if (realpath($absPath) === realpath(rtrim(UPLOAD_DIR, '/\\'))) { + return 'root'; + } + $rel = ltrim(str_replace('\\', '/', substr($absPath, strlen($base))), '/'); + return $rel; +} -// Respect the app-wide timezone already set in config.php (do NOT force UTC here) - -/** - * Recursively list files and folders under $dir. - * Skips symlinks and internal folders we don't want to index. - */ -function scanDirectory(string $dir): array { +/** List immediate files in a directory (no subdirs). */ +function list_files(string $dir): array { + $out = []; $entries = @scandir($dir); - if ($entries === false) return []; - - $results = []; + if ($entries === false) return $out; foreach ($entries as $name) { if ($name === '.' || $name === '..') continue; - $path = $dir . $name; - - // Skip symlinks to avoid loops - if (is_link($path)) continue; - - // Recurse into directories - if (is_dir($path)) { - $results[] = $path . '/'; - $results = array_merge($results, scanDirectory($path . '/')); - } else { - $results[] = $path; - } + $p = $dir . DIRECTORY_SEPARATOR . $name; + if (is_file($p)) $out[] = $name; } - return $results; + sort($out, SORT_NATURAL | SORT_FLAG_CASE); + return $out; } -/** - * Build the metadata JSON path parallel to uploads/ for a given item. - */ -function metadataPath(string $itemPath, string $uploadDir, string $metadataDir): string { - $relative = ltrim(str_replace($uploadDir, '', $itemPath), '/\\'); - return $metadataDir . $relative . '.json'; -} - -$allItems = scanDirectory($uploadDir); - -foreach ($allItems as $item) { - // Derive a relative path (used in metadata and for skip rules) - $relative = ltrim(str_replace($uploadDir, '', $item), '/\\'); - - // Skip some internal areas under uploads/ - if (strpos($relative, 'trash/') === 0 || strpos($relative, 'profile_pics/') === 0) { - continue; - } - - $metaPath = metadataPath($item, $uploadDir, $metadataDir); - - if (!file_exists($metaPath)) { - $isDir = is_dir($item); - $metadata = [ - 'path' => rtrim($relative, '/'), - 'type' => $isDir ? 'folder' : 'file', - 'size' => (!$isDir && is_file($item)) ? (int)filesize($item) : 0, - 'user' => 'Imported', - 'uploadDate' => date('c'), - ]; - - // Ensure parent directory exists with sane perms (umask from start.sh handles final modes) - $parent = dirname($metaPath); - if (!is_dir($parent)) { - @mkdir($parent, 0775, true); - } - - if (@file_put_contents($metaPath, json_encode($metadata, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES)) === false) { - fwrite(STDERR, "Failed to write metadata: {$metaPath}\n"); - } else { - echo "Created metadata for: {$relative}\n"; +/** Recursively list subfolders (relative folder keys), skipping trash/. */ +function list_all_folders(string $root): array { + $root = rtrim($root, '/\\'); + $folders = ['root']; + $it = new RecursiveIteratorIterator( + new RecursiveDirectoryIterator($root, FilesystemIterator::SKIP_DOTS), + RecursiveIteratorIterator::SELF_FIRST + ); + foreach ($it as $path => $info) { + if ($info->isDir()) { + // relative key like "foo/bar" + $rel = ltrim(str_replace(['\\'], '/', substr($path, strlen($root) + 1)), '/'); + if ($rel === '') continue; + // skip trash subtree + if (strpos($rel, 'trash/') === 0 || $rel === 'trash') continue; + // obey the app’s folder-name regex to stay consistent + if (preg_match(REGEX_FOLDER_NAME, basename($rel))) { + $folders[] = $rel; + } } } + // de-dup and sort + $folders = array_values(array_unique($folders)); + sort($folders, SORT_NATURAL | SORT_FLAG_CASE); + return $folders; } + +// ---------- main ---------- + +$uploads = rtrim(UPLOAD_DIR, '/\\'); +$metaDir = rtrim(META_DIR, '/\\'); + +// Ensure metadata dir exists +if (!is_dir($metaDir)) { + @mkdir($metaDir, 0775, true); +} + +$now = date(DATE_TIME_FORMAT); +$folders = list_all_folders($uploads); + +$totalCreated = 0; +$totalPruned = 0; + +foreach ($folders as $folderKey) { + $absFolder = ($folderKey === 'root') + ? $uploads + : $uploads . DIRECTORY_SEPARATOR . str_replace('/', DIRECTORY_SEPARATOR, $folderKey); + + if (!is_dir($absFolder)) continue; + + $files = list_files($absFolder); + + $metaPath = folder_metadata_path($folderKey); + $metadata = []; + if (is_file($metaPath)) { + $decoded = json_decode(@file_get_contents($metaPath), true); + if (is_array($decoded)) $metadata = $decoded; + } + + // Build a quick lookup of existing entries + $existing = array_keys($metadata); + + // ADD missing files + foreach ($files as $name) { + // Keep same filename validation used in FileModel + if (!preg_match(REGEX_FILE_NAME, $name)) continue; + + if (!isset($metadata[$name])) { + $metadata[$name] = [ + 'uploaded' => $now, + 'modified' => $now, + 'uploader' => 'Imported' + ]; + $totalCreated++; + echo "Indexed: " . ($folderKey === 'root' ? '' : $folderKey . '/') . $name . PHP_EOL; + } + } + + // PRUNE stale metadata entries for files that no longer exist + foreach ($existing as $name) { + if (!in_array($name, $files, true)) { + unset($metadata[$name]); + $totalPruned++; + } + } + + // Ensure parent dir exists and write metadata + @mkdir(dirname($metaPath), 0775, true); + if (@file_put_contents($metaPath, json_encode($metadata, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES)) === false) { + fwrite(STDERR, "Failed to write metadata for folder: {$folderKey}\n"); + } +} + +echo "Done. Created {$totalCreated} entr" . ($totalCreated === 1 ? "y" : "ies") . + ", pruned {$totalPruned}.\n";