Skip to content

Commit

Permalink
Added the ability to parse a string. Added relevant unit tests and fi…
Browse files Browse the repository at this point in the history
…xed atomic writing bug
  • Loading branch information
davedelong committed Oct 2, 2010
1 parent 6866bdf commit 7cba09d
Show file tree
Hide file tree
Showing 13 changed files with 299 additions and 100 deletions.
12 changes: 12 additions & 0 deletions CHCSV.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
//
// CHCSV.h
// CHCSVParser
//
// Created by Dave DeLong on 10/2/10.
// Copyright 2010 Home. All rights reserved.
//

#import "CHCSVParser.h"
#import "CHCSVWriter.h"
#import "NSArray+CHCSVAdditions.h"
#import "NSString+CHCSVAdditions.h"
3 changes: 3 additions & 0 deletions CHCSVParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@

- (id) initWithContentsOfCSVFile:(NSString *)aCSVFile encoding:(NSStringEncoding)encoding error:(NSError **)anError;
- (id) initWithContentsOfCSVFile:(NSString *)aCSVFile usedEncoding:(NSStringEncoding *)usedEncoding error:(NSError **)anError;

- (id) initWithCSVString:(NSString *)csvString encoding:(NSStringEncoding)encoding error:(NSError **)anError;

- (void) parse;

@end
Expand Down
63 changes: 44 additions & 19 deletions CHCSVParser.m
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ @interface CHCSVParser ()

@property (retain) NSString * currentChunk;

- (void) discoverTextEncoding;
- (NSStringEncoding) textEncodingForData:(NSData *)chunkToSniff offset:(NSUInteger *)offset;

- (NSString *) nextCharacter;
- (void) runParseLoop;
Expand All @@ -90,6 +90,7 @@ - (id) initWithContentsOfCSVFile:(NSString *)aCSVFile encoding:(NSStringEncoding
csvFileHandle = [[NSFileHandle fileHandleForReadingAtPath:csvFile] retain];
if (csvFileHandle == nil) {
if (anError) {
NSLog(@"error for file: %@", csvFile);
*anError = [NSError errorWithDomain:@"com.davedelong.csv" code:0 userInfo:[NSDictionary dictionaryWithObject:@"Unable to open file for reading" forKey:NSLocalizedDescriptionKey]];
}
[self release];
Expand All @@ -113,7 +114,11 @@ - (id) initWithContentsOfCSVFile:(NSString *)aCSVFile encoding:(NSStringEncoding

- (id) initWithContentsOfCSVFile:(NSString *)aCSVFile usedEncoding:(NSStringEncoding *)usedEncoding error:(NSError **)anError {
if (self = [self initWithContentsOfCSVFile:aCSVFile encoding:NSUTF8StringEncoding error:anError]) {
[self discoverTextEncoding];

NSData * chunk = [csvFileHandle readDataOfLength:CHUNK_SIZE];
NSUInteger seekOffset = 0;
fileEncoding = [self textEncodingForData:chunk offset:&seekOffset];
[csvFileHandle seekToFileOffset:seekOffset];

if (usedEncoding) {
*usedEncoding = fileEncoding;
Expand All @@ -122,6 +127,26 @@ - (id) initWithContentsOfCSVFile:(NSString *)aCSVFile usedEncoding:(NSStringEnco
return self;
}

- (id) initWithCSVString:(NSString *)csvString encoding:(NSStringEncoding)encoding error:(NSError **)anError {
if (self = [super init]) {
csvFile = nil;
csvFileHandle = nil;
fileEncoding = encoding;

balancedQuotes = YES;
balancedEscapes = YES;

currentLine = 0;
currentField = [[NSMutableString alloc] init];

currentChunk = [csvString copy];
chunkIndex = 0;

state = CHCSVParserStateInsideFile;
}
return self;
}

- (void) dealloc {
[csvFileHandle release];
[csvFile release];
Expand All @@ -132,51 +157,51 @@ - (void) dealloc {
[super dealloc];
}

- (void) discoverTextEncoding {
NSData * chunkToSniff = [csvFileHandle readDataOfLength:CHUNK_SIZE];
- (NSStringEncoding) textEncodingForData:(NSData *)chunkToSniff offset:(NSUInteger *)offset {
NSUInteger length = [chunkToSniff length];
NSUInteger offset = 0;
*offset = 0;
NSStringEncoding encoding = NSUTF8StringEncoding;

if (length > 0) {
UInt8* bytes = (UInt8*)[chunkToSniff bytes];
fileEncoding = CFStringConvertEncodingToNSStringEncoding(CFStringGetSystemEncoding());
encoding = CFStringConvertEncodingToNSStringEncoding(CFStringGetSystemEncoding());
switch (bytes[0]) {
case 0x00:
if (length>3 && bytes[1]==0x00 && bytes[2]==0xFE && bytes[3]==0xFF) {
fileEncoding = NSUTF32BigEndianStringEncoding;
offset = 4;
encoding = NSUTF32BigEndianStringEncoding;
*offset = 4;
}
break;
case 0xEF:
if (length>2 && bytes[1]==0xBB && bytes[2]==0xBF) {
fileEncoding = NSUTF8StringEncoding;
offset = 3;
encoding = NSUTF8StringEncoding;
*offset = 3;
}
break;
case 0xFE:
if (length>1 && bytes[1]==0xFF) {
fileEncoding = NSUTF16BigEndianStringEncoding;
offset = 2;
encoding = NSUTF16BigEndianStringEncoding;
*offset = 2;
}
break;
case 0xFF:
if (length>1 && bytes[1]==0xFE) {
if (length>3 && bytes[2]==0x00 && bytes[3]==0x00) {
fileEncoding = NSUTF32LittleEndianStringEncoding;
offset = 4;
encoding = NSUTF32LittleEndianStringEncoding;
*offset = 4;
} else {
fileEncoding = NSUTF16LittleEndianStringEncoding;
offset = 2;
encoding = NSUTF16LittleEndianStringEncoding;
*offset = 2;
}
}
break;
default:
fileEncoding = NSUTF8StringEncoding; // fall back on UTF8
encoding = NSUTF8StringEncoding; // fall back on UTF8
break;
}
}
[csvFileHandle seekToFileOffset:offset];
return;

return encoding;
}

#pragma mark Parsing methods
Expand Down
45 changes: 39 additions & 6 deletions CHCSVParser.xcodeproj/project.pbxproj
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
objects = {

/* Begin PBXBuildFile section */
5516BCB512578CFC0025F235 /* NSString+CHCSVAdditions.m in Sources */ = {isa = PBXBuildFile; fileRef = 5516BCB412578CFC0025F235 /* NSString+CHCSVAdditions.m */; };
5516BCB912578D750025F235 /* CHCSVSupport.m in Sources */ = {isa = PBXBuildFile; fileRef = 5516BCB812578D750025F235 /* CHCSVSupport.m */; };
5516BCBB12578EA90025F235 /* NSString+CHCSVAdditions.m in Sources */ = {isa = PBXBuildFile; fileRef = 5516BCB412578CFC0025F235 /* NSString+CHCSVAdditions.m */; };
5516BCBC12578EAD0025F235 /* CHCSVSupport.m in Sources */ = {isa = PBXBuildFile; fileRef = 5516BCB812578D750025F235 /* CHCSVSupport.m */; };
551981D61203715400FBE033 /* CHCSVParser.m in Sources */ = {isa = PBXBuildFile; fileRef = 551981D51203715400FBE033 /* CHCSVParser.m */; };
557FCEB61203F938009FCDBA /* CoreServices.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 557FCEB51203F938009FCDBA /* CoreServices.framework */; };
557FD0431204A45D009FCDBA /* NSArray+CHCSVAdditions.m in Sources */ = {isa = PBXBuildFile; fileRef = 557FD0421204A45D009FCDBA /* NSArray+CHCSVAdditions.m */; };
Expand Down Expand Up @@ -37,6 +41,11 @@
08FB7796FE84155DC02AAC07 /* main.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = main.m; sourceTree = "<group>"; };
08FB779EFE84155DC02AAC07 /* Foundation.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Foundation.framework; path = /System/Library/Frameworks/Foundation.framework; sourceTree = "<absolute>"; };
32A70AAB03705E1F00C91783 /* CHCSVParser_Prefix.pch */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CHCSVParser_Prefix.pch; sourceTree = "<group>"; };
5516BCB312578CFC0025F235 /* NSString+CHCSVAdditions.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = "NSString+CHCSVAdditions.h"; sourceTree = "<group>"; };
5516BCB412578CFC0025F235 /* NSString+CHCSVAdditions.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = "NSString+CHCSVAdditions.m"; sourceTree = "<group>"; };
5516BCB612578D480025F235 /* CHCSV.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CHCSV.h; sourceTree = "<group>"; };
5516BCB712578D750025F235 /* CHCSVSupport.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CHCSVSupport.h; sourceTree = "<group>"; };
5516BCB812578D750025F235 /* CHCSVSupport.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = CHCSVSupport.m; sourceTree = "<group>"; };
551981D41203715400FBE033 /* CHCSVParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CHCSVParser.h; sourceTree = "<group>"; };
551981D51203715400FBE033 /* CHCSVParser.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = CHCSVParser.m; sourceTree = "<group>"; };
551981EE1203800300FBE033 /* Test.csv */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = Test.csv; sourceTree = "<group>"; };
Expand Down Expand Up @@ -91,12 +100,7 @@
children = (
32A70AAB03705E1F00C91783 /* CHCSVParser_Prefix.pch */,
08FB7796FE84155DC02AAC07 /* main.m */,
551981D41203715400FBE033 /* CHCSVParser.h */,
551981D51203715400FBE033 /* CHCSVParser.m */,
55EFC7B71210608C0070B303 /* CHCSVWriter.h */,
55EFC7B81210608C0070B303 /* CHCSVWriter.m */,
557FD0411204A45D009FCDBA /* NSArray+CHCSVAdditions.h */,
557FD0421204A45D009FCDBA /* NSArray+CHCSVAdditions.m */,
5516BCBA12578DA90025F235 /* CHCSVParser */,
551981EE1203800300FBE033 /* Test.csv */,
557FD05A1204A72B009FCDBA /* UnitTests.h */,
557FD05B1204A72B009FCDBA /* UnitTests.m */,
Expand All @@ -121,6 +125,24 @@
name = Products;
sourceTree = "<group>";
};
5516BCBA12578DA90025F235 /* CHCSVParser */ = {
isa = PBXGroup;
children = (
5516BCB612578D480025F235 /* CHCSV.h */,
551981D41203715400FBE033 /* CHCSVParser.h */,
551981D51203715400FBE033 /* CHCSVParser.m */,
55EFC7B71210608C0070B303 /* CHCSVWriter.h */,
55EFC7B81210608C0070B303 /* CHCSVWriter.m */,
5516BCB712578D750025F235 /* CHCSVSupport.h */,
5516BCB812578D750025F235 /* CHCSVSupport.m */,
557FD0411204A45D009FCDBA /* NSArray+CHCSVAdditions.h */,
557FD0421204A45D009FCDBA /* NSArray+CHCSVAdditions.m */,
5516BCB312578CFC0025F235 /* NSString+CHCSVAdditions.h */,
5516BCB412578CFC0025F235 /* NSString+CHCSVAdditions.m */,
);
name = CHCSVParser;
sourceTree = "<group>";
};
C6859EA2029092E104C91782 /* Documentation */ = {
isa = PBXGroup;
children = (
Expand Down Expand Up @@ -175,7 +197,14 @@
isa = PBXProject;
buildConfigurationList = 1DEB927808733DD40010E9CD /* Build configuration list for PBXProject "CHCSVParser" */;
compatibilityVersion = "Xcode 3.1";
developmentRegion = English;
hasScannedForEncodings = 1;
knownRegions = (
English,
Japanese,
French,
German,
);
mainGroup = 08FB7794FE84155DC02AAC07 /* CHCSVParser */;
projectDirPath = "";
projectRoot = "";
Expand Down Expand Up @@ -222,6 +251,8 @@
557FD0591204A71C009FCDBA /* NSArray+CHCSVAdditions.m in Sources */,
557FD05D1204A731009FCDBA /* UnitTests.m in Sources */,
55EFC7B91210608C0070B303 /* CHCSVWriter.m in Sources */,
5516BCB512578CFC0025F235 /* NSString+CHCSVAdditions.m in Sources */,
5516BCB912578D750025F235 /* CHCSVSupport.m in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
Expand All @@ -232,6 +263,8 @@
8DD76F9A0486AA7600D96B5E /* main.m in Sources */,
551981D61203715400FBE033 /* CHCSVParser.m in Sources */,
557FD0431204A45D009FCDBA /* NSArray+CHCSVAdditions.m in Sources */,
5516BCBB12578EA90025F235 /* NSString+CHCSVAdditions.m in Sources */,
5516BCBC12578EAD0025F235 /* CHCSVSupport.m in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
Expand Down
21 changes: 21 additions & 0 deletions CHCSVSupport.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
//
// CHCSVSupport.h
// CHCSVParser
//
// Created by Dave DeLong on 10/2/10.
// Copyright 2010 Home. All rights reserved.
//

#import <Foundation/Foundation.h>
#import "CHCSVParser.h"

@interface NSArrayCHCSVAggregator : NSObject <CHCSVParserDelegate> {
NSMutableArray * lines;
NSMutableArray * currentLine;
NSError * error;
}

@property (readonly) NSArray * lines;
@property (readonly) NSError * error;

@end
47 changes: 47 additions & 0 deletions CHCSVSupport.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
//
// CHCSVSupport.m
// CHCSVParser
//
// Created by Dave DeLong on 10/2/10.
// Copyright 2010 Home. All rights reserved.
//

#import "CHCSVSupport.h"


@implementation NSArrayCHCSVAggregator
@synthesize lines, error;

- (void) dealloc {
[lines release];
[currentLine release];
[error release];
[super dealloc];
}

- (void) parser:(CHCSVParser *)parser didStartDocument:(NSString *)csvFile {
lines = [[NSMutableArray alloc] init];
}

- (void) parser:(CHCSVParser *)parser didStartLine:(NSUInteger)lineNumber {
currentLine = [[NSMutableArray alloc] init];
}

- (void) parser:(CHCSVParser *)parser didEndLine:(NSUInteger)lineNumber {
[lines addObject:currentLine];
[currentLine release], currentLine = nil;
}

- (void) parser:(CHCSVParser *)parser didReadField:(NSString *)field {
[currentLine addObject:field];
}

- (void) parser:(CHCSVParser *)parser didEndDocument:(NSString *)csvFile {

}

- (void) parser:(CHCSVParser *)parser didFailWithError:(NSError *)anError {
error = [anError retain];
}

@end
11 changes: 6 additions & 5 deletions CHCSVWriter.m
Original file line number Diff line number Diff line change
Expand Up @@ -105,18 +105,19 @@ - (void) closeFile {
[outputHandle release], outputHandle = nil;

if (atomically == YES && [handleFile isEqual:destinationFile] == NO) {
NSError *err = nil;
if ([[NSFileManager defaultManager] fileExistsAtPath:destinationFile]) {
NSError *err = nil;
[[NSFileManager defaultManager] removeItemAtPath:destinationFile error:&err];
if (err != nil) {
error = [err retain];
return;
}
[[NSFileManager defaultManager] moveItemAtPath:handleFile toPath:destinationFile error:&err];
if (err != nil) {
error = [err retain];
}
}
[[NSFileManager defaultManager] moveItemAtPath:handleFile toPath:destinationFile error:&err];
if (err != nil) {
error = [err retain];
}
[[NSFileManager defaultManager] removeItemAtPath:handleFile error:nil];
}
}
}
Expand Down
Loading

0 comments on commit 7cba09d

Please sign in to comment.