<?php
/**
* hobbesgram-migrate.php — Data migration & index rebuild script
*
* Rebuilds data/index/catalog.json and data/index/search.json in a single
* pass through data/files/, writing each index file exactly once.
* Run this after upgrading to a version that adds the file catalog, or after
* any large SFTP/FTP bulk import.
*
* CLI: php hobbesgram-migrate.php
* Web: https://yoursite.com/hobbesgram-migrate.php?run=yes
*
* DELETE THIS FILE from the server after running.
*/
// ── Bootstrap ──────────────────────────────────────────────────────────────────
define('HOBBES', true);
require_once __DIR__ . '/config.php';
require_once __DIR__ . '/includes/storage.php';
require_once __DIR__ . '/includes/search.php';
set_time_limit(600);
ini_set('memory_limit', '256M');
$is_cli = (php_sapi_name() === 'cli');
// ── Web guard ──────────────────────────────────────────────────────────────────
if (!$is_cli && ($_GET['run'] ?? '') !== 'yes') {
header('Content-Type: text/html; charset=utf-8');
echo '<!DOCTYPE html><html lang="en"><head><meta charset="UTF-8">
<title>Hobbesgram Migration</title>
<style>
body{font-family:monospace;font-size:14px;max-width:700px;margin:40px auto;padding:0 20px;}
h2{border-bottom:2px solid #000;padding-bottom:6px;}
.warn{background:#fffbe6;border:1px solid #cca800;padding:10px 14px;margin:16px 0;}
.btn{display:inline-block;background:#000080;color:#fff;padding:8px 20px;text-decoration:none;font-weight:bold;}
</style></head><body>
<h2>Hobbesgram — Migration / Index Rebuild</h2>
<div class="warn">
<strong>What this does:</strong><br>
Scans every file in <code>data/files/</code> and rebuilds:<br>
• <code>data/index/catalog.json</code> (fast listing index — fixes home/browse timeouts)<br>
• <code>data/index/search.json</code> (full-text search index)<br><br>
Safe to run multiple times. Does <strong>not</strong> modify any file metadata.
</div>
<p><a href="?run=yes" class="btn">Run Migration Now</a></p>
<p style="color:#666;font-size:12px;">Delete this file from the server after use.</p>
</body></html>';
exit;
}
// ── Output helper ──────────────────────────────────────────────────────────────
if (!$is_cli) {
header('Content-Type: text/plain; charset=utf-8');
// Disable output buffering so progress streams to browser
if (ob_get_level()) ob_end_flush();
}
function out(string $msg = ''): void {
echo $msg . "\n";
if (!defined('IS_CLI') || !IS_CLI) @flush();
}
if ($is_cli) define('IS_CLI', true);
out('=== Hobbesgram Migration / Index Rebuild ===');
out('Started: ' . date('Y-m-d H:i:s'));
out();
// ── Step 1: Verify / create index directory ────────────────────────────────────
out('-- Step 1: Checking directories');
$index_dir = dirname(CATALOG_FILE);
if (!is_dir($index_dir)) {
if (mkdir($index_dir, 0755, true)) {
out(' Created: ' . $index_dir);
} else {
out(' ERROR: Could not create ' . $index_dir);
exit(1);
}
} else {
out(' OK: ' . $index_dir);
}
if (!is_dir(FILES_DIR)) {
out(' ERROR: FILES_DIR does not exist: ' . FILES_DIR);
exit(1);
}
out();
// ── Step 2: Scan all file JSON records (single pass) ─────────────────────────
out('-- Step 2: Scanning data/files/');
$all_files = glob(FILES_DIR . '/*.json') ?: [];
$total = count($all_files);
out(" Found {$total} JSON record(s)");
out();
$catalog = []; // id => lightweight record
$search_index = []; // keyword => [ids]
$counts = [
'approved' => 0,
'pending' => 0,
'skipped' => 0,
'total_size' => 0,
];
// Stop words (must match search_tokenize() in search.php)
$stop_words = ['the','a','an','and','or','in','of','to','for','is','it','at','as','by'];
function migrate_tokenize(string $text, array $stop): array {
$text = strtolower($text);
$text = preg_replace('/[^a-z0-9\s]/', ' ', $text);
$words = preg_split('/\s+/', $text, -1, PREG_SPLIT_NO_EMPTY);
$words = array_filter($words, fn($w) => strlen($w) >= 3 && !in_array($w, $stop, true));
return array_unique(array_values($words));
}
$dot_every = max(1, (int)($total / 50)); // print a dot every 2%
$processed = 0;
if (!$is_cli) out(' Progress (one dot per ~2%):');
foreach ($all_files as $json_path) {
$m = storage_read($json_path);
if (!$m || empty($m['id'])) {
$counts['skipped']++;
continue;
}
// ── Catalog entry ──────────────────────────────────────────────────────
$entry = [];
foreach (CATALOG_FIELDS as $k) {
$entry[$k] = $m[$k] ?? null;
}
$catalog[$m['id']] = $entry;
// ── Stats ──────────────────────────────────────────────────────────────
if (!empty($m['approved'])) {
$counts['approved']++;
$counts['total_size'] += (int)($m['size'] ?? 0);
// ── Search index (approved files only) ────────────────────────────
$corpus = implode(' ', array_filter([
$m['title'] ?? '',
$m['description'] ?? '',
$m['author'] ?? '',
$m['version'] ?? '',
$m['tags'] ?? '',
$m['category'] ?? '',
$m['original_name'] ?? '',
$m['uploader'] ?? '',
$m['os2_version'] ?? '',
$m['requirements'] ?? '',
$m['license'] ?? '',
]));
foreach (migrate_tokenize($corpus, $stop_words) as $kw) {
$search_index[$kw][] = $m['id'];
}
} else {
$counts['pending']++;
}
$processed++;
if ($processed % $dot_every === 0) {
echo '.';
if (!$is_cli) @flush();
}
}
// Deduplicate search index entries (multiple fields may produce same keyword)
foreach ($search_index as &$ids) {
$ids = array_unique(array_values($ids));
}
unset($ids);
out();
out();
// ── Step 3: Write catalog ──────────────────────────────────────────────────────
out('-- Step 3: Writing catalog');
if (storage_write(CATALOG_FILE, $catalog, false)) {
$size = round(filesize(CATALOG_FILE) / 1024, 1);
out(" OK: " . CATALOG_FILE . " ({$size} KB, " . count($catalog) . " entries)");
} else {
out(' ERROR: Could not write ' . CATALOG_FILE);
out(' Check directory permissions on ' . dirname(CATALOG_FILE));
}
out();
// ── Step 4: Write search index ─────────────────────────────────────────────────
out('-- Step 4: Writing search index');
if (storage_write(SEARCH_FILE, $search_index)) {
$size = round(filesize(SEARCH_FILE) / 1024, 1);
out(" OK: " . SEARCH_FILE . " ({$size} KB, " . count($search_index) . " keywords)");
} else {
out(' ERROR: Could not write ' . SEARCH_FILE);
}
out();
// ── Step 5: Report ─────────────────────────────────────────────────────────────
out('-- Results');
out(sprintf(' Total records scanned : %d', $total));
out(sprintf(' Approved (in catalog) : %d', $counts['approved']));
out(sprintf(' Pending : %d', $counts['pending']));
out(sprintf(' Skipped (bad/empty) : %d', $counts['skipped']));
out(sprintf(' Total approved size : %s', format_bytes($counts['total_size'])));
out(sprintf(' Search keywords : %d', count($search_index)));
out();
out('Finished: ' . date('Y-m-d H:i:s'));
out();
out('DELETE this file from the server: ' . basename(__FILE__));
// ── format_bytes helper (format_size may not be loaded) ───────────────────────
function format_bytes(int $bytes): string {
if ($bytes >= 1073741824) return round($bytes / 1073741824, 2) . ' GB';
if ($bytes >= 1048576) return round($bytes / 1048576, 1) . ' MB';
if ($bytes >= 1024) return round($bytes / 1024, 1) . ' KB';
return $bytes . ' B';
}