Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TSV parsing #23

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
notes.txt
notes.lhs
dist
.cabal-sandbox
cabal.sanbox.config
cabal.config

# emacs stuff
*~
\#*\#
/.emacs.desktop
/.emacs.desktop.lock
*.elc
auto-save-list
tramp
.\#*

# Org-mode
.org-id-locations
*_archive

# flymake-mode
*_flymake.*

# eshell files
/eshell/history
/eshell/lastdir

# elpa packages
/elpa/

# vim stuff
*.swp
*.swo

*.key
_darcs
darcs*
1 change: 1 addition & 0 deletions MissingH.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ Library
Control.Concurrent.Thread.Utils,
Network.Email.Sendmail,
Data.CSV,
Data.TSV,
System.Cmd.Utils,
Data.BinPacking,
Data.Progress.Tracker,
Expand Down
2 changes: 2 additions & 0 deletions README
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,8 @@ MissingH.Str * Leading/trailing whitespace removal

MissingH.Str.CSV * Parsing of comma-separated value (CSV) files

MissingH.Str.TSV * Parsing of tab-separated value (TSV) files

MissingH.Threads * Threaded callbacks

MissingH.Time * Utilities for working with times and dates
Expand Down
35 changes: 5 additions & 30 deletions src/Data/CSV.hs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{- arch-tag: CSV and TSV utilities
{- arch-tag: CSV utilities
Copyright (c) 2005-2011 John Goerzen <[email protected]>

All rights reserved.
Expand All @@ -22,28 +22,13 @@ Written by John Goerzen, jgoerzen\@complete.org

module Data.CSV (csvFile, genCsvFile) where
import Text.ParserCombinators.Parsec
import Data.List (intersperse)

eol :: forall st. GenParser Char st String
eol = (try $ string "\n\r") <|> (try $ string "\r\n") <|> string "\n" <|>
string "\r" <?> "End of line"
import Data.SeperatingValues.SeperatingValues

cell :: GenParser Char st String
cell = quotedcell <|> many (noneOf ",\n\r")

quotedchar :: GenParser Char st Char
quotedchar = noneOf "\""
<|> (try $ do string "\"\""
return '"'
)
quotedcell :: CharParser st String
quotedcell = do char '"'
content <- many quotedchar
char '"'
return content
cell = cellOfX ','

line :: GenParser Char st [String]
line = sepBy cell (char ',')
line = lineOfX ','

{- | Parse a Comma-Separated Value (CSV) file. The return value is a list of
lines; each line is a list of cells; and each cell is a String.
Expand Down Expand Up @@ -89,14 +74,4 @@ csvFile = endBy line eol
{- | Generate CSV data for a file. The resulting string can be
written out to disk directly. -}
genCsvFile :: [[String]] -> String
genCsvFile inp =
unlines . map csvline $ inp
where csvline :: [String] -> String
csvline l = concat . intersperse "," . map csvcells $ l
csvcells :: String -> String
csvcells "" = ""
csvcells c = '"' : convcell c ++ "\""
convcell :: String -> String
convcell c = concatMap convchar c
convchar '"' = "\"\""
convchar x = [x]
genCsvFile inp = genXsvFile "," inp
52 changes: 52 additions & 0 deletions src/Data/SeperatingValues/SeperatingValues.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
{- |
Module : Data.SeperatingValues.SeperatingValues
Copyright : Copyright (C) 2005-2011 John Goerzen
License : BSD3

Maintainer : John Goerzen <[email protected]>
Stability : provisional
Portability: portable

Misc/Helper Haskell Parsec parsers for any(X)-separated values (XSV) files.

Written by: Aistis Raulinaitis, [email protected]
-}

module Data.SeperatingValues.SeperatingValues(eol, cellOfX, quotedchar, lineOfX, genXsvFile) where

import Text.ParserCombinators.Parsec
import Data.List (intersperse)

eol :: forall st. GenParser Char st String
eol = (try $ string "\n\r") <|> (try $ string "\r\n") <|> string "\n" <|>
string "\r" <?> "End of line"

cellOfX :: Char -> GenParser Char st String
cellOfX x = quotedcell <|> many (noneOf (x : "\n\r"))

quotedchar :: GenParser Char st Char
quotedchar = noneOf "\""
<|> (try $ do string "\"\""
return '"'
)
quotedcell :: CharParser st String
quotedcell = do char '"'
content <- many quotedchar
char '"'
return content

lineOfX :: Char -> GenParser Char st [String]
lineOfX x = sepBy (cellOfX x) (char x)

genXsvFile :: String -> [[String]] -> String
genXsvFile x inp =
unlines . map xsvline $ inp
where xsvline :: [String] -> String
xsvline l = concat . intersperse x . map xsvcells $ l
xsvcells :: String -> String
xsvcells "" = ""
xsvcells c = '"' : convcell c ++ "\""
convcell :: String -> String
convcell c = concatMap convchar c
convchar '"' = "\"\""
convchar x = [x]
70 changes: 70 additions & 0 deletions src/Data/TSV.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
{- |
Module : Data.TSV
Copyright : Copyright (C) 2005-2011 John Goerzen
License : BSD3

Maintainer : John Goerzen <[email protected]>
Stability : provisional
Portability: portable

Haskell Parsec parsers for tab-separated value (TSV) files.

Written by: Aistis Raulinaitis, [email protected]
-}
module Data.TSV where

import Text.ParserCombinators.Parsec
import Data.SeperatingValues.SeperatingValues

cell :: GenParser Char st String
cell = cellOfX '\t'

line :: GenParser Char st [String]
line = lineOfX '\t'

{- | Parse a Tab-Separated Value (TSV) file. The return value is a list of
lines; each line is a list of cells; and each cell is a String.

Please note that TSV files may have a different number of cells on each line.
Also, it is impossible to distinguish a TSV line that has a call with no data
from a TSV line that has no cells.

Here are some examples:

>Input (literal strings) Parses As (Haskell String syntax)
>-------------------------------- ---------------------------------

>1 2 3 [["1", "2", "3"]]
>
>l1 [["l1"], ["l2"]]
>l2
>
> (empty line) [[""]]
>
>NQ "Quoted" [["NQ", "Quoted"]]
>
>NQ "Embedded""Quote" [["NQ", "Embedded\"Quote"]]

To parse a String, you might use:

>import Text.ParserCombinators.Parsec
>import Data.String.TSV
>....
>parse tsvFile "" mystring

To parse a file, you might instead use:

>do result <- parseFromFile tsvFile "/path/to/file"

Please note that the result of parsing will be of type
(Either ParseError [[String]]). A Left result indicates an error.
For more details, see the Parsec information.
-}

tsvFile :: CharParser st [[String]]
tsvFile = endBy line eol

{- | Generate TSV data for a file. The resulting string can be
written out to disk directly. -}
genTsvFile :: [[String]] -> String
genTsvFile inp = genXsvFile "\t" inp
33 changes: 33 additions & 0 deletions testsrc/Str/TSVtest.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
{- arch-tag: CSV tests main file
Copyright (C) 2005-2011 John Goerzen <[email protected]>

All rights reserved.

For license and copyright information, see the file LICENSE

-}

module Str.CSVtest(tests) where
import Test.HUnit
import Data.CSV
import Text.ParserCombinators.Parsec

test_tsv =
let f inp exp = TestLabel inp $ TestCase $
exp @=? case parse tsvFile "" inp of
Right x -> Right x
Left y -> Left (show y)
in [
f "" (Right []),
f "\n" (Right [[""]]),
f "1 2 3\n" (Right [["1", "2", "3"]]),
f "This is a Test Really\n" (Right [["This is a", "Test", "Really"]]),
f "l1\nl2\n" (Right [["l1"], ["l2"]]),
f "NQ \"Quoted\"\n" (Right [["NQ", "Quoted"]]),
f "1Q \"\"\"\"\n" (Right [["1Q", "\""]]),
f " \"\"\n" (Right [["", ""]]),
f "\"Embedded\"\"Quote\"\n" (Right [["Embedded\"Quote"]])
]

tests = TestList [TestLabel "tsv" (TestList test_csv)]

2 changes: 2 additions & 0 deletions testsrc/Tests.hs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import qualified HVIOtest
import qualified HVFStest
import qualified Timetest
import qualified Str.CSVtest
import qualified Str.TSVtest
import qualified WildMatchtest
import qualified Globtest
import qualified ProgressTrackertest
Expand All @@ -33,6 +34,7 @@ tests = TestList [TestLabel "test1" test1,
TestLabel "List" Listtest.tests,
TestLabel "Str" Strtest.tests,
TestLabel "CSV" Str.CSVtest.tests,
TestLabel "TSV" Str.TSVtest.tests,
TestLabel "Time" Timetest.tests,
TestLabel "Map" Maptest.tests,
TestLabel "ProgressTracker" ProgressTrackertest.tests,
Expand Down