Skip to content

Commit

Permalink
BL-204
Browse files Browse the repository at this point in the history
  • Loading branch information
bdw429s committed Jun 8, 2024
1 parent 32978d5 commit d90bf6e
Show file tree
Hide file tree
Showing 3 changed files with 126 additions and 7 deletions.
14 changes: 14 additions & 0 deletions src/main/java/ortus/boxlang/runtime/bifs/global/string/ReFind.java
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,20 @@ public Object _invoke( IBoxContext context, ArgumentsScope arguments ) {
reg_expression = "(?i)" + reg_expression;
}

// Replace POSIX character classes with Java regex equivalents
reg_expression = reg_expression.replace( "[:upper:]", "A-Z" )
.replace( "[:lower:]", "a-z" )
.replace( "[:digit:]", "\\d" )
.replace( "[:xdigit:]", "0-9a-fA-F" )
.replace( "[:alnum:]", "a-zA-Z0-9" )
.replace( "[:alpha:]", "a-zA-Z" )
.replace( "[:blank:]", " \\t" )
.replace( "[:space:]", "\\s" )
.replace( "[:cntrl:]", "\\x00-\\x1F\\x7F" )
.replace( "[:punct:]", "!\"#$%&'()*+,-./:;<=>?@\\[\\]^_`{|}~" )
.replace( "[:graph:]", "\\x21-\\x7E" )
.replace( "[:print:]", "\\x20-\\x7E" );

// Check if the start position is within valid bounds
if ( start < 1 ) {
// CF turns negative start into 1. Ugh, but ok.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,17 +76,80 @@ public Object _invoke( IBoxContext context, ArgumentsScope arguments ) {
regex = "(?i)" + regex;
}

StringBuffer result = new StringBuffer();
Matcher matcher = Pattern.compile( regex ).matcher( string );
// Replace POSIX character classes with Java regex equivalents
regex = regex.replace( "[:upper:]", "A-Z" )
.replace( "[:lower:]", "a-z" )
.replace( "[:digit:]", "\\d" )
.replace( "[:xdigit:]", "0-9a-fA-F" )
.replace( "[:alnum:]", "a-zA-Z0-9" )
.replace( "[:alpha:]", "a-zA-Z" )
.replace( "[:blank:]", " \\t" )
.replace( "[:space:]", "\\s" )
.replace( "[:cntrl:]", "\\x00-\\x1F\\x7F" )
.replace( "[:punct:]", "!\"#$%&'()*+,-./:;<=>?@\\[\\]^_`{|}~" )
.replace( "[:graph:]", "\\x21-\\x7E" )
.replace( "[:print:]", "\\x20-\\x7E" );

StringBuffer result = new StringBuffer();
Matcher matcher = Pattern.compile( regex ).matcher( string );

boolean upperCase = false;
boolean lowerCase = false;

while ( matcher.find() ) {
StringBuffer replacement = new StringBuffer( substring );
for ( int i = 0; i < replacement.length() - 1; i++ ) {
if ( replacement.charAt( i ) == '\\' && Character.isDigit( replacement.charAt( i + 1 ) ) ) {
int groupIndex = Character.getNumericValue( replacement.charAt( i + 1 ) );
String group = matcher.group( groupIndex );
replacement.replace( i, i + 2, group );
i += group.length() - 2;
if ( replacement.charAt( i ) == '\\' ) {
// If the character before the \ is also a \, skip this iteration
if ( i > 0 && replacement.charAt( i - 1 ) == '\\' ) {
continue;
}

if ( replacement.charAt( i + 1 ) == 'U' ) {
upperCase = true;
lowerCase = false;
replacement.delete( i, i + 2 );
i--;
continue;
} else if ( replacement.charAt( i + 1 ) == 'L' ) {
lowerCase = true;
upperCase = false;
replacement.delete( i, i + 2 );
i--;
continue;
} else if ( replacement.charAt( i + 1 ) == 'E' ) {
upperCase = false;
lowerCase = false;
replacement.delete( i, i + 2 );
i--;
continue;
} else if ( Character.isDigit( replacement.charAt( i + 1 ) ) ) {
int groupIndex = Character.getNumericValue( replacement.charAt( i + 1 ) );
String group = matcher.group( groupIndex );

if ( upperCase && group != null ) {
group = group.toUpperCase();
} else if ( lowerCase && group != null ) {
group = group.toLowerCase();
}
// Check if the previous two characters were \\u or \\l
if ( i >= 2 && replacement.charAt( i - 2 ) == '\\' && group != null ) {
if ( replacement.charAt( i - 1 ) == 'u' ) {
// Uppercase the first character of the group
group = Character.toUpperCase( group.charAt( 0 ) ) + group.substring( 1 );
replacement.delete( i - 2, i );
i -= 2;
} else if ( replacement.charAt( i - 1 ) == 'l' ) {
// Lowercase the first character of the group
group = Character.toLowerCase( group.charAt( 0 ) ) + group.substring( 1 );
replacement.delete( i - 2, i );
i -= 2;
}
}

replacement.replace( i, i + 2, group );
i += ( group != null ? group.length() : 0 ) - 2;
}
}
}
matcher.appendReplacement( result, Matcher.quoteReplacement( replacement.toString() ) );
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -115,4 +115,46 @@ public void testReplaceBackReferenceOne() {
assertThat( variables.get( result ) ).isEqualTo( "*abc*_000def999" );
}

@Test
public void testCaseStartStop() {
instance.executeSource(
"""
result = REReplace("HELLO", "([[:upper:]]*)", "Don't shout\\scream \\L\\1");
""",
context );
assertThat( variables.get( result ) ).isEqualTo( "Don't shout\\scream hello" );

instance.executeSource(
"""
result = REReplace("first@SECOND@THIRD", "(.*)@(.*)@(.*)", "\\U\\1\\E@\\L\\2\\E@\\3");
""",
context );
assertThat( variables.get( result ) ).isEqualTo( "FIRST@second@THIRD" );
}

@Test
public void testCaseSwapOneChar() {
instance.executeSource(
"""
result = REReplace("first@SECOND@THIRD", "(.*)@(.*)@(.*)", "\\u\\1@\\l\\2@\\3");
""",
context );
assertThat( variables.get( result ) ).isEqualTo( "First@sECOND@THIRD" );

instance.executeSource(
"""
result = "zachary".reReplace("^(.)(.*)$", "\\u\\1\\2");
""",
context );
assertThat( variables.get( result ) ).isEqualTo( "Zachary" );

instance.executeSource(
"""
result = "zachary".reReplace("^(.)(.*)$", "\\U\\1\\2");
""",
context );
assertThat( variables.get( result ) ).isEqualTo( "ZACHARY" );

}

}

0 comments on commit d90bf6e

Please sign in to comment.