Hobbesgram — Migration / Index Rebuild

What this does:
Scans every file in data/files/ and rebuilds:
data/index/catalog.json (fast listing index — fixes home/browse timeouts)
data/index/search.json (full-text search index)

Safe to run multiple times. Does not modify any file metadata.

Run Migration Now

Delete this file from the server after use.

'; exit; } // ── Output helper ────────────────────────────────────────────────────────────── if (!$is_cli) { header('Content-Type: text/plain; charset=utf-8'); // Disable output buffering so progress streams to browser if (ob_get_level()) ob_end_flush(); } function out(string $msg = ''): void { echo $msg . "\n"; if (!defined('IS_CLI') || !IS_CLI) @flush(); } if ($is_cli) define('IS_CLI', true); out('=== Hobbesgram Migration / Index Rebuild ==='); out('Started: ' . date('Y-m-d H:i:s')); out(); // ── Step 1: Verify / create index directory ──────────────────────────────────── out('-- Step 1: Checking directories'); $index_dir = dirname(CATALOG_FILE); if (!is_dir($index_dir)) { if (mkdir($index_dir, 0755, true)) { out(' Created: ' . $index_dir); } else { out(' ERROR: Could not create ' . $index_dir); exit(1); } } else { out(' OK: ' . $index_dir); } if (!is_dir(FILES_DIR)) { out(' ERROR: FILES_DIR does not exist: ' . FILES_DIR); exit(1); } out(); // ── Step 2: Scan all file JSON records (single pass) ───────────────────────── out('-- Step 2: Scanning data/files/'); $all_files = glob(FILES_DIR . '/*.json') ?: []; $total = count($all_files); out(" Found {$total} JSON record(s)"); out(); $catalog = []; // id => lightweight record $search_index = []; // keyword => [ids] $counts = [ 'approved' => 0, 'pending' => 0, 'skipped' => 0, 'total_size' => 0, ]; // Stop words (must match search_tokenize() in search.php) $stop_words = ['the','a','an','and','or','in','of','to','for','is','it','at','as','by']; function migrate_tokenize(string $text, array $stop): array { $text = strtolower($text); $text = preg_replace('/[^a-z0-9\s]/', ' ', $text); $words = preg_split('/\s+/', $text, -1, PREG_SPLIT_NO_EMPTY); $words = array_filter($words, fn($w) => strlen($w) >= 3 && !in_array($w, $stop, true)); return array_unique(array_values($words)); } $dot_every = max(1, (int)($total / 50)); // print a dot every 2% $processed = 0; if (!$is_cli) out(' Progress (one dot per ~2%):'); foreach ($all_files as $json_path) { $m = storage_read($json_path); if (!$m || empty($m['id'])) { $counts['skipped']++; continue; } // ── Catalog entry ────────────────────────────────────────────────────── $entry = []; foreach (CATALOG_FIELDS as $k) { $entry[$k] = $m[$k] ?? null; } $catalog[$m['id']] = $entry; // ── Stats ────────────────────────────────────────────────────────────── if (!empty($m['approved'])) { $counts['approved']++; $counts['total_size'] += (int)($m['size'] ?? 0); // ── Search index (approved files only) ──────────────────────────── $corpus = implode(' ', array_filter([ $m['title'] ?? '', $m['description'] ?? '', $m['author'] ?? '', $m['version'] ?? '', $m['tags'] ?? '', $m['category'] ?? '', $m['original_name'] ?? '', $m['uploader'] ?? '', $m['os2_version'] ?? '', $m['requirements'] ?? '', $m['license'] ?? '', ])); foreach (migrate_tokenize($corpus, $stop_words) as $kw) { $search_index[$kw][] = $m['id']; } } else { $counts['pending']++; } $processed++; if ($processed % $dot_every === 0) { echo '.'; if (!$is_cli) @flush(); } } // Deduplicate search index entries (multiple fields may produce same keyword) foreach ($search_index as &$ids) { $ids = array_unique(array_values($ids)); } unset($ids); out(); out(); // ── Step 3: Write catalog ────────────────────────────────────────────────────── out('-- Step 3: Writing catalog'); if (storage_write(CATALOG_FILE, $catalog, false)) { $size = round(filesize(CATALOG_FILE) / 1024, 1); out(" OK: " . CATALOG_FILE . " ({$size} KB, " . count($catalog) . " entries)"); } else { out(' ERROR: Could not write ' . CATALOG_FILE); out(' Check directory permissions on ' . dirname(CATALOG_FILE)); } out(); // ── Step 4: Write search index ───────────────────────────────────────────────── out('-- Step 4: Writing search index'); if (storage_write(SEARCH_FILE, $search_index)) { $size = round(filesize(SEARCH_FILE) / 1024, 1); out(" OK: " . SEARCH_FILE . " ({$size} KB, " . count($search_index) . " keywords)"); } else { out(' ERROR: Could not write ' . SEARCH_FILE); } out(); // ── Step 5: Report ───────────────────────────────────────────────────────────── out('-- Results'); out(sprintf(' Total records scanned : %d', $total)); out(sprintf(' Approved (in catalog) : %d', $counts['approved'])); out(sprintf(' Pending : %d', $counts['pending'])); out(sprintf(' Skipped (bad/empty) : %d', $counts['skipped'])); out(sprintf(' Total approved size : %s', format_bytes($counts['total_size']))); out(sprintf(' Search keywords : %d', count($search_index))); out(); out('Finished: ' . date('Y-m-d H:i:s')); out(); out('DELETE this file from the server: ' . basename(__FILE__)); // ── format_bytes helper (format_size may not be loaded) ─────────────────────── function format_bytes(int $bytes): string { if ($bytes >= 1073741824) return round($bytes / 1073741824, 2) . ' GB'; if ($bytes >= 1048576) return round($bytes / 1048576, 1) . ' MB'; if ($bytes >= 1024) return round($bytes / 1024, 1) . ' KB'; return $bytes . ' B'; }