using Bit.Seeder.Migration.Models; using Bit.Seeder.Migration.Utils; using Microsoft.Data.SqlClient; using Microsoft.Extensions.Logging; namespace Bit.Seeder.Migration.Databases; /// /// SQL Server database exporter that handles schema discovery and data export. /// public class SqlServerExporter(DatabaseConfig config, ILogger logger) : IDisposable { private readonly ILogger _logger = logger; private readonly string _host = config.Host; private readonly int _port = config.Port; private readonly string _database = config.Database; private readonly string _username = config.Username; private readonly string _password = config.Password; private SqlConnection? _connection; private bool _disposed = false; /// /// Connects to the SQL Server database. /// public bool Connect() { try { var safeConnectionString = $"Server={_host},{_port};Database={_database};" + $"User Id={_username};Password={DbSeederConstants.REDACTED_PASSWORD};" + $"TrustServerCertificate=True;" + $"Connection Timeout={DbSeederConstants.DEFAULT_CONNECTION_TIMEOUT};"; var actualConnectionString = safeConnectionString.Replace(DbSeederConstants.REDACTED_PASSWORD, _password); _connection = new SqlConnection(actualConnectionString); _connection.Open(); _logger.LogInformation("Connected to SQL Server: {Host}/{Database}", _host, _database); return true; } catch (Exception ex) { _logger.LogError("Failed to connect to SQL Server: {Message}", ex.Message); return false; } } /// /// Disconnects from the SQL Server database. /// public void Disconnect() { if (_connection != null) { _connection.Close(); _connection.Dispose(); _connection = null; _logger.LogInformation("Disconnected from SQL Server"); } } /// /// Discovers all tables in the SQL Server database. /// /// Whether to exclude system tables /// List of table names public List DiscoverTables(bool excludeSystemTables = true) { if (_connection == null) throw new InvalidOperationException("Not connected to database"); try { var query = @" SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_TYPE = 'BASE TABLE'"; if (excludeSystemTables) { query += @" AND TABLE_SCHEMA = 'dbo' AND TABLE_NAME NOT IN ('sysdiagrams', '__EFMigrationsHistory')"; } query += " ORDER BY TABLE_NAME"; using var command = new SqlCommand(query, _connection); using var reader = command.ExecuteReader(); var tables = new List(); while (reader.Read()) { var tableName = reader.GetString(0); // Validate table name immediately to prevent second-order SQL injection IdentifierValidator.ValidateOrThrow(tableName, "table name"); tables.Add(tableName); } _logger.LogInformation("Discovered {Count} tables: {Tables}", tables.Count, string.Join(", ", tables)); return tables; } catch (Exception ex) { _logger.LogError("Error discovering tables: {Message}", ex.Message); throw; } } /// /// Gets detailed information about a table including columns, types, and row count. /// /// The name of the table to query /// TableInfo containing schema and metadata public TableInfo GetTableInfo(string tableName) { if (_connection == null) throw new InvalidOperationException("Not connected to database"); IdentifierValidator.ValidateOrThrow(tableName, "table name"); try { // Get column information var columnQuery = @" SELECT COLUMN_NAME, DATA_TYPE, IS_NULLABLE, CHARACTER_MAXIMUM_LENGTH, NUMERIC_PRECISION, NUMERIC_SCALE FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = @TableName ORDER BY ORDINAL_POSITION"; var columns = new List(); var columnTypes = new Dictionary(); using (var command = new SqlCommand(columnQuery, _connection)) { command.Parameters.AddWithValue("@TableName", tableName); using var reader = command.ExecuteReader(); while (reader.Read()) { var colName = reader.GetString(0); // Validate column name immediately to prevent second-order SQL injection IdentifierValidator.ValidateOrThrow(colName, "column name"); var dataType = reader.GetString(1); var isNullable = reader.GetString(2); var maxLength = reader.IsDBNull(3) ? (int?)null : reader.GetInt32(3); var precision = reader.IsDBNull(4) ? (byte?)null : reader.GetByte(4); var scale = reader.IsDBNull(5) ? (int?)null : reader.GetInt32(5); columns.Add(colName); // Build type description var typeDesc = dataType.ToUpper(); if (maxLength.HasValue && dataType.ToLower() is "varchar" or "nvarchar" or "char" or "nchar") { typeDesc += $"({maxLength})"; } else if (precision.HasValue && dataType.ToLower() is "decimal" or "numeric") { typeDesc += $"({precision},{scale})"; } typeDesc += isNullable == "YES" ? " NULL" : " NOT NULL"; columnTypes[colName] = typeDesc; } } if (columns.Count == 0) { throw new InvalidOperationException($"Table '{tableName}' not found"); } var countQuery = $"SELECT COUNT(*) FROM [{tableName}]"; int rowCount; using (var command = new SqlCommand(countQuery, _connection)) { rowCount = command.GetScalarValue(0, _logger, $"row count for {tableName}"); } _logger.LogInformation("Table {TableName}: {ColumnCount} columns, {RowCount} rows", tableName, columns.Count, rowCount); return new TableInfo { Name = tableName, Columns = columns, ColumnTypes = columnTypes, RowCount = rowCount }; } catch (Exception ex) { _logger.LogError("Error getting table info for {TableName}: {Message}", tableName, ex.Message); throw; } } /// /// Exports all data from a table with streaming to avoid memory exhaustion. /// /// The name of the table to export /// Batch size for progress reporting /// Tuple of column names and data rows public (List Columns, List Data) ExportTableData( string tableName, int batchSize = DbSeederConstants.PROGRESS_REPORTING_INTERVAL) { if (_connection == null) throw new InvalidOperationException("Not connected to database"); IdentifierValidator.ValidateOrThrow(tableName, "table name"); try { // Get table info first var tableInfo = GetTableInfo(tableName); // Validate all column names foreach (var colName in tableInfo.Columns) { IdentifierValidator.ValidateOrThrow(colName, "column name"); } // Build column list with proper quoting var quotedColumns = tableInfo.Columns.Select(col => $"[{col}]").ToList(); var columnList = string.Join(", ", quotedColumns); // Execute query var query = $"SELECT {columnList} FROM [{tableName}]"; _logger.LogInformation("Executing export query for {TableName}", tableName); using var command = new SqlCommand(query, _connection); command.CommandTimeout = DbSeederConstants.LARGE_BATCH_COMMAND_TIMEOUT; using var reader = command.ExecuteReader(); // Identify GUID columns for in-place uppercase conversion var guidColumnIndices = IdentifyGuidColumns(tableInfo); // Fetch data in batches - still loads into memory but with progress reporting // Note: For true streaming, consumers should use yield return pattern var allData = new List(); while (reader.Read()) { var row = new object[tableInfo.Columns.Count]; reader.GetValues(row); // Convert GUID values in-place to uppercase for Bitwarden compatibility ConvertGuidsToUppercaseInPlace(row, guidColumnIndices); allData.Add(row); if (allData.Count % batchSize == 0) { _logger.LogDebug("Fetched {Count} rows from {TableName}", allData.Count, tableName); } } _logger.LogInformation("Exported {Count} rows from {TableName}", allData.Count, tableName); return (tableInfo.Columns, allData); } catch (Exception ex) { _logger.LogError("Error exporting data from {TableName}: {Message}", tableName, ex.Message); throw; } } /// /// Identifies columns that likely contain JSON data by sampling values. /// /// The name of the table to analyze /// Number of rows to sample for analysis /// List of column names that appear to contain JSON public List IdentifyJsonColumns( string tableName, int sampleSize = DbSeederConstants.DEFAULT_SAMPLE_SIZE) { if (_connection == null) throw new InvalidOperationException("Not connected to database"); IdentifierValidator.ValidateOrThrow(tableName, "table name"); try { var tableInfo = GetTableInfo(tableName); var jsonColumns = new List(); // Only check varchar/text columns var textColumns = tableInfo.ColumnTypes .Where(kv => kv.Value.ToLower().Contains("varchar") || kv.Value.ToLower().Contains("text") || kv.Value.ToLower().Contains("nvarchar")) .Select(kv => kv.Key) .ToList(); if (textColumns.Count == 0) return jsonColumns; // Validate all column names foreach (var colName in textColumns) { IdentifierValidator.ValidateOrThrow(colName, "column name"); } // Sample data from text columns var quotedColumns = textColumns.Select(col => $"[{col}]").ToList(); var columnList = string.Join(", ", quotedColumns); var whereClause = string.Join(" OR ", textColumns.Select(col => $"[{col}] IS NOT NULL")); var query = $@" SELECT TOP {sampleSize} {columnList} FROM [{tableName}] WHERE {whereClause}"; using var command = new SqlCommand(query, _connection); using var reader = command.ExecuteReader(); var sampleData = new List(); while (reader.Read()) { var row = new object[textColumns.Count]; reader.GetValues(row); sampleData.Add(row); } // Analyze each column // JSON detection threshold: 50% of samples must look like JSON const double jsonThreshold = 0.5; for (int i = 0; i < textColumns.Count; i++) { var colName = textColumns[i]; var jsonIndicators = 0; var totalNonNull = 0; foreach (var row in sampleData) { if (i < row.Length && row[i] != DBNull.Value) { totalNonNull++; var value = row[i]?.ToString()?.Trim() ?? string.Empty; // Check for JSON indicators if ((value.StartsWith("{") && value.EndsWith("}")) || (value.StartsWith("[") && value.EndsWith("]"))) { jsonIndicators++; } } } // If more than threshold of non-null values look like JSON, mark as JSON column if (totalNonNull > 0 && (double)jsonIndicators / totalNonNull > jsonThreshold) { jsonColumns.Add(colName); _logger.LogInformation("Identified {ColumnName} as likely JSON column ({JsonIndicators}/{TotalNonNull} samples)", colName, jsonIndicators, totalNonNull); } } return jsonColumns; } catch (Exception ex) { _logger.LogError("Error identifying JSON columns in {TableName}: {Message}", tableName, ex.Message); return []; } } /// /// Identifies GUID column indices in a table for efficient processing. /// /// Table metadata including column types /// List of column indices that are GUID/uniqueidentifier columns private List IdentifyGuidColumns(TableInfo tableInfo) { var guidColumnIndices = new List(); for (int i = 0; i < tableInfo.Columns.Count; i++) { var columnName = tableInfo.Columns[i]; if (tableInfo.ColumnTypes.TryGetValue(columnName, out var columnType)) { if (columnType.ToUpper().Contains("UNIQUEIDENTIFIER")) { guidColumnIndices.Add(i); _logger.LogDebug("Found GUID column '{ColumnName}' at index {Index}", columnName, i); } } } if (guidColumnIndices.Count > 0) { _logger.LogInformation("Converting {Count} GUID column(s) to uppercase", guidColumnIndices.Count); } return guidColumnIndices; } /// /// Converts GUID values to uppercase in-place within a data row. /// More efficient than creating new arrays as it modifies the array directly. /// /// The data row to modify /// Indices of columns containing GUIDs private void ConvertGuidsToUppercaseInPlace(object[] row, List guidColumnIndices) { foreach (var guidIdx in guidColumnIndices) { if (guidIdx < row.Length && row[guidIdx] != null && row[guidIdx] != DBNull.Value) { var guidValue = row[guidIdx].ToString(); // Convert to uppercase in-place, preserving the GUID format row[guidIdx] = guidValue?.ToUpper() ?? string.Empty; } } } /// /// Tests the connection to SQL Server by executing a simple query. /// public bool TestConnection() { try { if (Connect()) { using var command = new SqlCommand("SELECT 1", _connection); // Use null-safe scalar value retrieval var result = command.GetScalarValue(0, _logger, "connection test"); Disconnect(); return result == 1; } return false; } catch (Exception ex) { _logger.LogError("Connection test failed: {Message}", ex.Message); return false; } } /// /// Disposes of the SQL Server exporter and releases all resources. /// public void Dispose() { Dispose(true); GC.SuppressFinalize(this); } /// /// Implements Dispose pattern for resource cleanup. /// protected virtual void Dispose(bool disposing) { if (!_disposed) { if (disposing) { Disconnect(); } _disposed = true; } } }