Significantly improved error detection and handling for excel column parsing

Significantly improved error detection and handling for excel column parsing
Added support for default values if a column text length is 0
All exceptions are propagated and logged and handled by parser
This commit is contained in:
Collin Smith
2020-12-12 14:53:39 -08:00
parent d0f06c6c06
commit db1b60fb7d
4 changed files with 224 additions and 97 deletions

View File

@ -11,6 +11,7 @@ import java.util.Arrays;
import java.util.Iterator;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.apache.commons.lang3.tuple.Triple;
@ -218,10 +219,10 @@ public abstract class Excel<
final Class primaryKeyType = indexed ? null : primaryKey.getType();
for (int i = excel.offset(); parser.cacheLine() != -1; i++) {
E entry = excel.newEntry();
String name = indexed ? null : parser.parseString(primaryKeyColumnId);
String name = indexed ? null : parser.parseString(primaryKeyColumnId, "");
try {
MDC.put("entry", name);
inject(excel, entry, name, columns, parser);
MDC.put("entry", indexed || StringUtils.isBlank(name) ? "" + i : name);
parseColumns(excel, entry, name, columns, parser);
} finally {
MDC.remove("entry");
}
@ -231,8 +232,24 @@ public abstract class Excel<
return excel;
}
static void
catchParseException(
Throwable t,
Field field,
Class type,
String key,
String columnName,
CharSequence token
) {
ParseException parseException = new ParseException(t, field,
"error parsing field %s row: '%s' column: '%s': '%s' as %s",
field, key, columnName, token.toString(),
type.isArray() ? type.getComponentType().getCanonicalName() : type.getCanonicalName());
log.warn(parseException.getMessage(), parseException);
}
static <E extends Entry, S extends Serializer<E>, T extends Excel<E, S>>
void inject(
void parseColumns(
T excel,
E entry,
String key,
@ -244,42 +261,155 @@ public abstract class Excel<
for (Triple<Field, int[], String[]> column : columns) {
final Field field = column.getLeft();
final int[] columnIds = column.getMiddle();
final int numColumns = columnIds.length;
final String[] columnNames = column.getRight();
final Class type = field.getType();
try {
if (type == String.class) {
field.set(entry, parser.parseString(columnIds[0]));
try {
field.set(entry, parser.parseString(columnIds[0], ""));
} catch (Throwable t) {
catchParseException(t, field, type, key, columnNames[0], parser.token(columnIds[0]));
}
} else if (type == String[].class) {
field.set(entry, parser.parseString(columnIds));
} else if (type == byte.class) {
field.setByte(entry, parser.parseByte(columnIds[0]));
final String[] value = new String[numColumns];
for (int i = 0; i < numColumns; i++) {
try {
value[i] = parser.parseString(columnIds[i], "");
} catch (Throwable t) {
catchParseException(t, field, type, key, columnNames[i], parser.token(columnIds[i]));
}
}
field.set(entry, value);
}
else if (type == byte.class) {
try {
field.setByte(entry, parser.parseByte(columnIds[0], (byte) 0));
} catch (Throwable t) {
catchParseException(t, field, type, key, columnNames[0], parser.token(columnIds[0]));
}
} else if (type == byte[].class) {
field.set(entry, parser.parseByte(columnIds));
} else if (type == short.class) {
field.setShort(entry, parser.parseShort(columnIds[0]));
final byte[] value = new byte[numColumns];
for (int i = 0; i < numColumns; i++) {
try {
value[i] = parser.parseByte(columnIds[i], (byte) 0);
} catch (Throwable t) {
catchParseException(t, field, type, key, columnNames[i], parser.token(columnIds[i]));
}
}
field.set(entry, value);
}
else if (type == short.class) {
try {
field.setShort(entry, parser.parseShort(columnIds[0], (short) 0));
} catch (Throwable t) {
catchParseException(t, field, type, key, columnNames[0], parser.token(columnIds[0]));
}
} else if (type == short[].class) {
field.set(entry, parser.parseShort(columnIds));
} else if (type == int.class) {
field.setInt(entry, parser.parseInt(columnIds[0]));
final short[] value = new short[numColumns];
for (int i = 0; i < numColumns; i++) {
try {
value[i] = parser.parseShort(columnIds[i], (short) 0);
} catch (Throwable t) {
catchParseException(t, field, type, key, columnNames[i], parser.token(columnIds[i]));
}
}
field.set(entry, value);
}
else if (type == int.class) {
try {
field.setInt(entry, parser.parseInt(columnIds[0], 0));
} catch (Throwable t) {
catchParseException(t, field, type, key, columnNames[0], parser.token(columnIds[0]));
}
} else if (type == int[].class) {
field.set(entry, parser.parseInt(columnIds));
} else if (type == long.class) {
field.setLong(entry, parser.parseLong(columnIds[0]));
final int[] value = new int[numColumns];
for (int i = 0; i < numColumns; i++) {
try {
value[i] = parser.parseInt(columnIds[i], 0);
} catch (Throwable t) {
catchParseException(t, field, type, key, columnNames[i], parser.token(columnIds[i]));
}
}
field.set(entry, value);
}
else if (type == long.class) {
try {
field.setLong(entry, parser.parseLong(columnIds[0], 0L));
} catch (Throwable t) {
catchParseException(t, field, type, key, columnNames[0], parser.token(columnIds[0]));
}
} else if (type == long[].class) {
field.set(entry, parser.parseLong(columnIds));
} else if (type == boolean.class) {
field.setBoolean(entry, parser.parseBoolean(columnIds[0]));
final long[] value = new long[numColumns];
for (int i = 0; i < numColumns; i++) {
try {
value[i] = parser.parseLong(columnIds[i], 0L);
} catch (Throwable t) {
catchParseException(t, field, type, key, columnNames[i], parser.token(columnIds[i]));
}
}
field.set(entry, value);
}
else if (type == boolean.class) {
try {
field.setBoolean(entry, parser.parseBoolean(columnIds[0], false));
} catch (Throwable t) {
catchParseException(t, field, type, key, columnNames[0], parser.token(columnIds[0]));
}
} else if (type == boolean[].class) {
field.set(entry, parser.parseBoolean(columnIds));
} else if (type == float.class) {
field.setFloat(entry, parser.parseFloat(columnIds[0]));
final boolean[] value = new boolean[numColumns];
for (int i = 0; i < numColumns; i++) {
try {
value[i] = parser.parseBoolean(columnIds[i], false);
} catch (Throwable t) {
catchParseException(t, field, type, key, columnNames[i], parser.token(columnIds[i]));
}
}
field.set(entry, value);
}
else if (type == float.class) {
try {
field.setFloat(entry, parser.parseFloat(columnIds[0], 0f));
} catch (Throwable t) {
catchParseException(t, field, type, key, columnNames[0], parser.token(columnIds[0]));
}
} else if (type == float[].class) {
field.set(entry, parser.parseFloat(columnIds));
} else if (type == double.class) {
field.setDouble(entry, parser.parseDouble(columnIds[0]));
final float[] value = new float[numColumns];
for (int i = 0; i < numColumns; i++) {
try {
value[i] = parser.parseFloat(columnIds[i], 0f);
} catch (Throwable t) {
catchParseException(t, field, type, key, columnNames[i], parser.token(columnIds[i]));
}
}
field.set(entry, value);
}
else if (type == double.class) {
try {
field.setDouble(entry, parser.parseDouble(columnIds[0], 0d));
} catch (Throwable t) {
catchParseException(t, field, type, key, columnNames[0], parser.token(columnIds[0]));
}
} else if (type == double[].class) {
field.set(entry, parser.parseDouble(columnIds));
} else {
final double[] value = new double[numColumns];
for (int i = 0; i < numColumns; i++) {
try {
value[i] = parser.parseDouble(columnIds[i], 0d);
} catch (Throwable t) {
catchParseException(t, field, type, key, columnNames[i], parser.token(columnIds[i]));
}
}
field.set(entry, value);
}
else {
throw new ParseException(field, "Cannot parse fields of type %s",
org.apache.commons.lang3.ClassUtils.getCanonicalName(type));
}

View File

@ -35,6 +35,29 @@ public class ParseException extends Exception {
setStackTrace(stackTrace.toArray());
}
ParseException(Throwable t, Field field, String format, Object... args) {
this(format, args);
initCause(t);
// Formats the leading stack trace element like:
// at com.riiablo.excel.txt.MonStats$Entry.hcIdx2(MonStats.java:0)
Class declaringClass = field.getDeclaringClass();
StackTraceElement fieldElement = new StackTraceElement(
declaringClass.getName(),
field.getName(),
getRootClass(declaringClass).getSimpleName() + ".java",
0); // 0 indicates line 0 -- non-zero required for link parsing in IDEA
StackTraceElement[] originalStackTrace = getStackTrace();
Array<StackTraceElement> stackTrace = new Array<>(
true,
originalStackTrace.length + 1,
StackTraceElement.class);
stackTrace.add(fieldElement);
stackTrace.addAll(originalStackTrace);
setStackTrace(stackTrace.toArray());
}
ParseException(Class clazz, String format, Object... args) {
this(format, args);

View File

@ -167,11 +167,11 @@ lineBuilder:
}
public String columnName(int i) {
return columnNames.get(i).toString();
return columnNames.get(i);
}
public String rowName() {
return parseString(0);
return parseString(0, "");
}
public int columnId(String columnName) {
@ -201,35 +201,49 @@ lineBuilder:
return line.subSequence(tokenOffsets[i], tokenOffsets[i + 1]);
}
public byte parseByte(int i) {
public byte parseByte(int i, byte defaultValue) {
final int[] tokenOffsets = tokenOffsetsCache;
final int intValue = line.parseInt(tokenOffsets[i], tokenOffsets[i + 1]);
final int startOffset = tokenOffsets[i];
final int endOffset = tokenOffsets[i + 1];
if (startOffset >= endOffset) return defaultValue;
final int intValue = line.parseInt(startOffset, endOffset);
final byte result = (byte) intValue;
if (result != intValue) {
throw new NumberFormatException(line.subSequence(tokenOffsets[i], tokenOffsets[i + 1], false).toString());
throw new NumberFormatException(line.subSequence(startOffset, endOffset, false).toString());
}
return result;
}
public short parseShort(int i) {
public short parseShort(int i, short defaultValue) {
final int[] tokenOffsets = tokenOffsetsCache;
return line.parseShort(tokenOffsets[i], tokenOffsets[i + 1]);
final int startOffset = tokenOffsets[i];
final int endOffset = tokenOffsets[i + 1];
if (startOffset >= endOffset) return defaultValue;
return line.parseShort(startOffset, endOffset);
}
public int parseInt(int i) {
public int parseInt(int i, int defaultValue) {
final int[] tokenOffsets = tokenOffsetsCache;
return line.parseInt(tokenOffsets[i], tokenOffsets[i + 1]);
final int startOffset = tokenOffsets[i];
final int endOffset = tokenOffsets[i + 1];
if (startOffset >= endOffset) return defaultValue;
return line.parseInt(startOffset, endOffset);
}
public long parseLong(int i) {
public long parseLong(int i, long defaultValue) {
final int[] tokenOffsets = tokenOffsetsCache;
return line.parseLong(tokenOffsets[i], tokenOffsets[i + 1]);
final int startOffset = tokenOffsets[i];
final int endOffset = tokenOffsets[i + 1];
if (startOffset >= endOffset) return defaultValue;
return line.parseLong(startOffset, endOffset);
}
public boolean parseBoolean(int i) {
public boolean parseBoolean(int i, boolean defaultValue) {
final int[] tokenOffsets = tokenOffsetsCache;
final int intValue = line.parseInt(tokenOffsets[i], tokenOffsets[i + 1]);
final int startOffset = tokenOffsets[i];
final int endOffset = tokenOffsets[i + 1];
if (startOffset >= endOffset) return defaultValue;
final int intValue = line.parseInt(startOffset, endOffset);
if ((intValue & 1) != intValue) {
log.warn("boolean exceeds boolean radix at {}:{} (\"{}\", \"{}\"): {}",
index, i, rowName(), columnName(i), intValue);
@ -238,67 +252,27 @@ lineBuilder:
return intValue != 0;
}
public float parseFloat(int i) {
public float parseFloat(int i, float defaultValue) {
final int[] tokenOffsets = tokenOffsetsCache;
return line.parseFloat(tokenOffsets[i], tokenOffsets[i + 1]);
final int startOffset = tokenOffsets[i];
final int endOffset = tokenOffsets[i + 1];
if (startOffset >= endOffset) return defaultValue;
return line.parseFloat(startOffset, endOffset);
}
public double parseDouble(int i) {
public double parseDouble(int i, double defaultValue) {
final int[] tokenOffsets = tokenOffsetsCache;
return line.parseDouble(tokenOffsets[i], tokenOffsets[i + 1]);
final int startOffset = tokenOffsets[i];
final int endOffset = tokenOffsets[i + 1];
if (startOffset >= endOffset) return defaultValue;
return line.parseDouble(startOffset, endOffset);
}
public String parseString(int i) {
public String parseString(int i, String defaultValue) {
final int[] tokenOffsets = tokenOffsetsCache;
final int startOffset = tokenOffsets[i];
final int endOffset = tokenOffsets[i + 1];
if (startOffset >= endOffset) return defaultValue;
return line.toString(tokenOffsets[i], tokenOffsets[i + 1]);
}
public byte[] parseByte(int[] columns) {
final int length = columns.length;
byte[] values = new byte[length];
for (int i = 0; i < length; i++) values[i] = parseByte(columns[i]);
return values;
}
public short[] parseShort(int[] columns) {
final int length = columns.length;
short[] values = new short[length];
for (int i = 0; i < length; i++) values[i] = parseShort(columns[i]);
return values;
}
public long[] parseLong(int[] columns) {
final int length = columns.length;
long[] values = new long[length];
for (int i = 0; i < length; i++) values[i] = parseLong(columns[i]);
return values;
}
public boolean[] parseBoolean(int[] columns) {
final int length = columns.length;
boolean[] values = new boolean[length];
for (int i = 0; i < length; i++) values[i] = parseBoolean(columns[i]);
return values;
}
public float[] parseFloat(int[] columns) {
final int length = columns.length;
float[] values = new float[length];
for (int i = 0; i < length; i++) values[i] = parseFloat(columns[i]);
return values;
}
public double[] parseDouble(int[] columns) {
final int length = columns.length;
double[] values = new double[length];
for (int i = 0; i < length; i++) values[i] = parseDouble(columns[i]);
return values;
}
public String[] parseString(int[] columns) {
final int length = columns.length;
String[] values = new String[length];
for (int i = 0; i < length; i++) values[i] = parseString(columns[i]);
return values;
}
}

View File

@ -46,7 +46,7 @@ public class TxtParserTest extends RiiabloTest {
FileHandle handle = Gdx.files.internal("test/monstats.txt");
TxtParser parser = TxtParser.parse(handle.read());
parser.cacheLine();
Assert.assertEquals(0, parser.parseInt(1));
Assert.assertEquals(0, parser.parseInt(1, -1));
LogManager.setLevel("com.riiablo.excel2", Level.TRACE);
}
}