#
Advanced Topics
This section covers advanced configuration, optimization techniques, and troubleshooting for Laravel Ingest.
#
Performance Optimization for Large Imports
Choosing the right transaction mode and chunk size is crucial for performance when handling large datasets.
#
Transaction Mode Recommendations
#
Chunk Size Optimization
// For simple inserts (less memory)
->setChunkSize(1000) // Default: 100
// For complex transformations (more memory)
->setChunkSize(50)
// For maximum performance with large datasets
->setChunkSize(2000)
->transactionMode(TransactionMode::NONE)
#
Queue Worker Optimization
# For large imports with more memory
php artisan queue:work --memory=1024 --timeout=600
# For fast processing of small chunks
php artisan queue:work --memory=256 --timeout=60
#
Database Optimization
// Temporarily disable indexes for maximum insert speed
->beforeRow(function(array &$row) {
// Execute before import
DB::statement('SET UNIQUE_CHECKS=0');
DB::statement('SET FOREIGN_KEY_CHECKS=0');
})
->afterRow(function($model, array $row) {
// Execute after import
DB::statement('SET UNIQUE_CHECKS=1');
DB::statement('SET FOREIGN_KEY_CHECKS=1');
})
#
Memory-Management
#
Strategies for Large Files
- Use URL sources instead of uploads for very large files
- Reduce chunk size when encountering memory limit errors
Disable row logging for very large imports:
// config/ingest.php 'log_rows' => false,
#
PHP Configuration
; php.ini
memory_limit = 1G
max_execution_time = 300
upload_max_filesize = 100M
post_max_size = 100M
#
Error Handling and Recovery
#
Retry Strategies
// Automatic retry for transient errors
->validate([
'email' => 'required|email',
'phone' => 'nullable|regex:/^\+?[1-9]\d{1,14}$/',
])
// Clean data in beforeRow()
->beforeRow(function(array &$row) {
$row['email'] = strtolower(trim($row['email'] ?? ''));
$row['phone'] = preg_replace('/[^\d+]/', '', $row['phone'] ?? '');
})
#
Error Thresholds
// Configuration for maximum error count
// config/ingest.php
'max_error_percentage' => 10, // 10% errors allowed
'max_absolute_errors' => 1000, // Max 1000 erroneous rows
#
Monitoring and Debugging
#
Detailed Logging Strategy
// In your importer class
public function getConfig(): IngestConfig
{
return IngestConfig::for(Product::class)
->afterRow(function(Product $product, array $row) {
// Detailed logging for debugging
if (app()->environment('local')) {
Log::debug('Imported product', [
'id' => $product->id,
'source_data' => $row,
'memory_usage' => memory_get_usage(true),
]);
}
});
}
#
Performance Metrics
// Monitor import performance
->beforeRow(function(array &$row) {
if (!isset($GLOBALS['import_start_time'])) {
$GLOBALS['import_start_time'] = microtime(true);
$GLOBALS['import_row_count'] = 0;
}
$GLOBALS['import_row_count']++;
})
->afterRow(function($model, array $row) {
if ($GLOBALS['import_row_count'] % 1000 === 0) {
$elapsed = microtime(true) - $GLOBALS['import_start_time'];
$rows_per_sec = $GLOBALS['import_row_count'] / $elapsed;
Log::info("Import speed: {$rows_per_sec} rows/sec");
}
})
#
Advanced Scenarios
#
Multi-Model Imports
// Import heterogeneous data into different tables
->resolveModelUsing(function(array $row) {
return match($row['record_type']) {
'user' => User::class,
'admin' => AdminUser::class,
'customer' => Customer::class,
default => throw new InvalidArgumentException("Unknown record type: {$row['record_type']}")
};
})
#
Conditional Validation
// Dynamic validation rules based on data
->validate(function(array $row) {
$rules = [
'email' => 'required|email',
];
if ($row['user_type'] === 'premium') {
$rules['subscription_end'] = 'required|date|after:today';
}
return $rules;
})
#
External API Integration
// Enrich data during import with external APIs
->mapAndTransform('postal_code', 'city', function($postalCode, $row) {
$response = Http::get("https://api.postal.com/{$postalCode}");
return $response->json('city') ?? 'Unknown';
})