Update 2023-12-04: See fastest ReadOnlySpan<T> version at the bottom of the post
Console Input
using System;
using System.Collections.Generic;
using System.Threading.Tasks;
using System.Text;
using System.IO;
using System.Diagnostics;
namespace Domainizr.ConsoleApp
{
public class Program
{
public static async Task Main(string[] args)
{
Console.WriteLine($"Starting");
var sw = new Stopwatch();
sw.Start();
var bufferSize = 1024 * 1024;
var buffer = new char[bufferSize];
var groupCount = 0;
var stream = Console.OpenStandardInput(bufferSize);
//using (var stream = File.OpenRead("MOCK_DATA.json"))
//{
var reader = new StreamReader(stream, Encoding.UTF8);
Console.SetIn(reader); // This will allow input >256 chars
Console.WriteLine($"Reading groups");
await foreach(var group in ReadGroups(reader, buffer))
{
if (groupCount % 100 == 0) Console.Write($".");
groupCount++;
}
//}
Console.WriteLine($"\r\nCount: {groupCount}");
Console.WriteLine($"Elapsed: {sw.ElapsedMilliseconds}ms");
}
public static string HexString(string plainText)
{
var plainTextBytes = Encoding.UTF8.GetBytes(plainText);
return BitConverter.ToString(plainTextBytes);
}
public static async IAsyncEnumerable<string> ReadGroups(StreamReader stream, char[] buffer)
{
var separator = "\n";
Console.WriteLine($"Separator: {HexString(separator)}");
var lastPart = "";
while (true)
{
var size = await stream.ReadAsync(buffer, 0, buffer.Length);
if (size <= 0) break;
var s = lastPart + new string(buffer[0..size]);
var parts = SplitString(s, separator);
foreach(var part in parts)
{
if (part.complete)
{
lastPart = "";
yield return part.value;
}
else
{
lastPart += part.value;
}
}
}
if (!string.IsNullOrEmpty(lastPart))
yield return lastPart;
}
public static IEnumerable<(bool complete, string value)> SplitString(string source, string separator)
{
if (source.Length <= 0) yield break;
var start = 0;
var sepLen = separator.Length;
while(true)
{
var end = source.IndexOf(separator, start);
if (end < 0) break;
yield return (true, new string(source.AsSpan()[start..end]));
start = end + sepLen;
}
var last = new string(source.AsSpan()[start..source.Length]);
if (!string.IsNullOrEmpty(last)) {
yield return (false, last);
}
}
}
}
Usage: Run application and type: 123#45#67<ENTER>89#<ENTER>
Update: 2023-28-11: Refactored
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Text;
using System.Threading.Tasks;
namespace ConsoleApp
{
class Program
{
public static async Task Main(string[] args)
{
if (args.Length == 0)
{
Console.WriteLine("Please provide an input file path as a command-line argument.");
return;
}
string filePath = args[0];
Console.WriteLine($"Starting");
var sw = new Stopwatch();
sw.Start();
var bufferSize = 1024 * 32;
var buffer = new char[bufferSize];
var groupCount = 0;
try
{
using (var stream = File.OpenRead(filePath))
{
var reader = new StreamReader(stream, Encoding.UTF8);
Console.SetIn(reader); // This will allow input >256 chars
Console.WriteLine($"Reading groups");
double groupSizes = 0;
await foreach(var group in ReadGroups(reader, buffer, "\n"))
{
if (groupCount < 10) {
Console.WriteLine($"group: {group}");
}
if (groupCount % 1000 == 0) Console.Write($".");
groupCount++;
groupSizes += group.Length;
}
Console.WriteLine($"\nAverage size: {groupSizes / groupCount}");
}
}
catch (Exception ex)
{
Console.WriteLine($"\r\nError: {ex}");
}
Console.WriteLine($"\r\nCount: {groupCount}");
Console.WriteLine($"Elapsed: {FormatElapsedTime(sw.Elapsed)}");
}
// A helper method to format elapsed time in a human-readable form
public static string FormatElapsedTime(TimeSpan elapsed)
{
return $"{(int)elapsed.TotalHours}h {elapsed.Minutes}m {elapsed.Seconds}s {elapsed.Milliseconds}ms";
}
public static async IAsyncEnumerable<string> ReadGroups(StreamReader stream, char[] buffer, string separator)
{
var openParts = new List<string>();
while (true)
{
var size = await stream.ReadAsync(buffer, 0, buffer.Length);
if (size <= 0) break;
var s = new string(buffer, 0, size);
var position = 0;
while (true)
{
var index = s.IndexOf(separator, position);
if (index < 0) break;
openParts.Add(s.Substring(position, index - position));
yield return string.Concat(openParts);
openParts.Clear();
position = index + separator.Length;
}
openParts.Add(s.Substring(position));
}
yield return string.Concat(openParts);
}
public static async IAsyncEnumerable<string> ReadGroups2(StreamReader stream, char[] buffer, string separator)
{
var stringBuilder = new StringBuilder();
while (true)
{
var size = await stream.ReadAsync(buffer, 0, buffer.Length);
if (size <= 0) break;
var s = new string(buffer, 0, size);
var position = 0;
while (true)
{
var index = s.IndexOf(separator, position);
if (index < 0) break;
stringBuilder.Append(s, position, index - position);
yield return stringBuilder.ToString();
stringBuilder.Clear();
position = index + separator.Length;
}
stringBuilder.Append(s, position, s.Length - position);
}
if (stringBuilder.Length > 0)
yield return stringBuilder.ToString();
}
}
}
Update 2023-12-04: Refactored with ReadOnlySpan<T>
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Text;
using System.Threading.Tasks;
namespace ConsoleApp
{
class Program
{
public static async Task Main(string[] args)
{
if (args.Length == 0)
{
Console.WriteLine("Please provide an input file path as a command-line argument.");
return;
}
string filePath = args[0];
Console.WriteLine($"Starting");
var sw = new Stopwatch();
sw.Start();
var bufferSize = 1024;
var buffer = new char[bufferSize];
var groupCount = 0;
try
{
using (var stream = File.OpenRead(filePath))
{
var reader = new StreamReader(stream, Encoding.UTF8);
Console.SetIn(reader); // This will allow input >256 chars
Console.WriteLine($"Reading groups");
double groupSizes = 0;
await foreach (var group in ReadGroups(reader, buffer, "\n"))
{
if (groupCount < 10)
{
Console.WriteLine($"group: {group}");
}
if (groupCount % 1000 == 0) Console.Write($".");
groupCount++;
groupSizes += group.Length;
}
Console.WriteLine($"\nAverage size: {groupSizes / groupCount}");
}
}
catch (Exception ex)
{
Console.WriteLine($"\r\nError: {ex}");
}
Console.WriteLine($"\r\nCount: {groupCount}");
Console.WriteLine($"Elapsed: {FormatElapsedTime(sw.Elapsed)}");
}
// A helper method to format elapsed time in a human-readable form
public static string FormatElapsedTime(TimeSpan elapsed)
{
return $"{(int)elapsed.TotalHours}h {elapsed.Minutes}m {elapsed.Seconds}s {elapsed.Milliseconds}ms";
}
public static async IAsyncEnumerable<string> ReadGroups(StreamReader stream, char[] buffer, string separator)
{
var stringBuilder = new StringBuilder();
while (true)
{
var size = await stream.ReadAsync(buffer, 0, buffer.Length);
if (size <= 0) break;
foreach (var item in ReadGroupsSync(stringBuilder, buffer, separator, size))
yield return item;
}
if (stringBuilder.Length > 0)
yield return stringBuilder.ToString();
}
private static List<string> ReadGroupsSync(StringBuilder stringBuilder, char[] buffer, string separator, int size)
{
var list = new List<string>();
var s = buffer.AsSpan(0, size);
var position = 0;
while (true)
{
var index = s.IndexOf(separator.AsSpan());
if (index < 0) break;
stringBuilder.Append(s.Slice(0, index));
list.Add(stringBuilder.ToString());
stringBuilder.Clear();
position = position + index + separator.Length;
s = buffer.AsSpan(position, size - position);
}
stringBuilder.Append(s.Slice(0, s.Length));
return list;
}
}
}
502000cookie-checkC# Read Groups of string separated (streaming) from STDIN