Skip to content

Commit

Permalink
Add indexing of iRODS paths to homepage
Browse files Browse the repository at this point in the history
Paths tagged in iRODS with specific metadata are indexed automatically
and presented on the homepage.
  • Loading branch information
kjsanger committed Apr 11, 2024
1 parent 4ae115f commit 40e8d51
Show file tree
Hide file tree
Showing 11 changed files with 694 additions and 138 deletions.
27 changes: 20 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,22 +37,35 @@ Usage:
sqyrrl start [flags]

Flags:
--cert-file string Path to the SSL certificate file
-h, --help help for start
--host string Address on which to listen, host part (default "localhost")
--irods-env string Path to the iRODS environment file (default "~/.irods/irods_environment.json")
--key-file string Path to the SSL private key file
--port int Port on which to listen (default 3333)
--cert-file string Path to the SSL certificate file
-h, --help help for start
--host string Address on which to listen, host part (default "localhost")
--index-interval duration Interval at which update the index (default 1m0s)
--irods-env string Path to the iRODS environment file (default "/Users/kdj/.irods/irods_environment.json")
--key-file string Path to the SSL private key file
--port string Port on which to listen (default "3333")

Global Flags:
--log-level string Set the log level (trace, debug, info, warn, error) (default "info")

```

To stop the server, send `SIGINT` or `SIGTERM` to the process. The server will wait for
active connections to close before shutting down.

For additional options, use the `--help` flag.

## Tagging iRODS data objects for display on the home page

This is an experiment feature. It allows the user to tag iRODS data objects with metadata so that they will
be displayed in the Sqyrrl home page for convenience. To tag an iRODS data object, add a metadata attribute
`sqyrrl:index` with value `1`. Data objects may be grouped together on the page, under a title, known as a
"category". To specify a category for a data object, add a metadata attribute `sqyrrl:category` with the
value being the category name.

The home page will be re-indexed at the interval specified by the `--index-interval` flag. The home page
auto-refreshes every 30 seconds.

## Dependencies

Sqyrrl uses [go-irodsclient](https://github.com/cyverse/go-irodsclient) to connect to iRODS.
Sqyrrl uses [go-irodsclient](https://github.com/cyverse/go-irodsclient) to connect to iRODS.
22 changes: 14 additions & 8 deletions cmd/sqyrrl.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,9 @@ type cliFlags struct {

host string // Address to listen on, host part
level string // Logging level
port int // Port to listen on
port string // Port to listen on

indexInterval time.Duration // Interval to index files
}

var cliFlagsSelected = cliFlags{
Expand Down Expand Up @@ -117,11 +119,12 @@ func startServer(cmd *cobra.Command, args []string) {
logger := configureRootLogger(&cliFlagsSelected)

server.ConfigureAndStart(logger, server.Config{
Host: cliFlagsSelected.host,
Port: cliFlagsSelected.port,
CertFilePath: cliFlagsSelected.certFilePath,
KeyFilePath: cliFlagsSelected.keyFilePath,
EnvFilePath: cliFlagsSelected.envFilePath,
Host: cliFlagsSelected.host,
Port: cliFlagsSelected.port,
CertFilePath: cliFlagsSelected.certFilePath,
KeyFilePath: cliFlagsSelected.keyFilePath,
EnvFilePath: cliFlagsSelected.envFilePath,
IndexInterval: cliFlagsSelected.indexInterval,
})
}

Expand All @@ -147,8 +150,8 @@ func CLI() {
startCmd.Flags().StringVar(&cliFlagsSelected.host,
"host", "localhost",
"Address on which to listen, host part")
startCmd.Flags().IntVar(&cliFlagsSelected.port,
"port", 3333,
startCmd.Flags().StringVar(&cliFlagsSelected.port,
"port", "3333",
"Port on which to listen")
startCmd.Flags().StringVar(&cliFlagsSelected.certFilePath,
"cert-file", "",
Expand All @@ -159,6 +162,9 @@ func CLI() {
startCmd.Flags().StringVar(&cliFlagsSelected.envFilePath,
"irods-env", server.IRODSEnvFilePath(),
"Path to the iRODS environment file")
startCmd.Flags().DurationVar(&cliFlagsSelected.indexInterval,
"index-interval", server.DefaultIndexInterval,
"Interval at which update the index")

rootCmd.AddCommand(startCmd)

Expand Down
113 changes: 108 additions & 5 deletions server/handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,16 +18,29 @@
package server

import (
"context"
"github.com/rs/xid"
"github.com/rs/zerolog/hlog"
"io/fs"
"net/http"
"path"
"time"

"github.com/cyverse/go-irodsclient/irods/types"
"github.com/rs/zerolog"
)

// HandlerChain is a function that takes an http.Handler and returns an new http.Handler
// wrapping the input handler. Each handler in the chain should process the request in
// some way, and then call the next handler. Ideally, the functionality of each handler
// should be orthogonal to the others.
//
// This is sometimes called "middleware" in Go. I haven't used that term here because it
// already has an established meaning in the context of operating systems and networking.
type HandlerChain func(http.Handler) http.Handler

// HandleHomePage is a handler for the static home page.
func HandleHomePage(logger zerolog.Logger) http.Handler {
func HandleHomePage(logger zerolog.Logger, index *ItemIndex) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
logger.Trace().Msg("HomeHandler called")

Expand All @@ -42,12 +55,23 @@ func HandleHomePage(logger zerolog.Logger) http.Handler {
http.Redirect(w, r, redirect, http.StatusPermanentRedirect)
}

type customData struct {
URL string
Version string
type pageData struct {
Version string
Categories []string
CategorisedItems map[string][]Item
}

data := customData{Version: Version, URL: r.URL.RequestURI()}
catItems := make(map[string][]Item)
cats := index.Categories()
for _, cat := range cats {
catItems[cat] = index.ItemsInCategory(cat)
}

data := pageData{
Version: Version,
Categories: cats,
CategorisedItems: catItems,
}

tplName := "home.gohtml"
if err := templates.ExecuteTemplate(w, tplName, data); err != nil {
Expand All @@ -59,6 +83,8 @@ func HandleHomePage(logger zerolog.Logger) http.Handler {
}

func HandleStaticContent(logger zerolog.Logger) http.Handler {
logger.Trace().Msg("StaticContentHandler called")

sub := func(dir fs.FS, name string) fs.FS {
f, err := fs.Sub(dir, name)
if err != nil {
Expand Down Expand Up @@ -100,3 +126,80 @@ func HandleIRODSGet(logger zerolog.Logger, account *types.IRODSAccount) http.Han
getFileRange(rodsLogger, w, r, account, sanPath)
})
}

// AddRequestLogger adds an HTTP request suiteLogger to the handler chain.
//
// If a correlation ID is present in the request context, it is logged.
func AddRequestLogger(logger zerolog.Logger) HandlerChain {
return func(next http.Handler) http.Handler {
lh := hlog.NewHandler(logger)

ah := hlog.AccessHandler(func(r *http.Request, status, size int, dur time.Duration) {
var corrID string
if val := r.Context().Value(correlationIDKey); val != nil {
corrID = val.(string)
}

hlog.FromRequest(r).Info().
Str("correlation_id", corrID).
Dur("duration", dur).
Int("size", size).
Int("status", status).
Str("method", r.Method).
Str("url", r.URL.RequestURI()).
Str("remote_addr", r.RemoteAddr).
Str("forwarded_for", r.Header.Get(HeaderForwardedFor)).
Str("user_agent", r.UserAgent()).
Msg("Request served")
})
return lh(ah(next))
}
}

// AddCorrelationID adds a correlation ID to the request context and response headers.
func AddCorrelationID(logger zerolog.Logger) HandlerChain {
return func(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
var corrID string
if corrID = r.Header.Get(HeaderCorrelationID); corrID == "" {
corrID = xid.New().String()
logger.Trace().
Str("correlation_id", corrID).
Str("url", r.URL.RequestURI()).
Msg("Creating a new correlation ID")
w.Header().Add(HeaderCorrelationID, corrID)
} else {
logger.Trace().
Str("correlation_id", corrID).
Str("url", r.URL.RequestURI()).
Msg("Using correlation ID from request")
}

ctx := context.WithValue(r.Context(), correlationIDKey, corrID)
next.ServeHTTP(w, r.WithContext(ctx))
})
}
}

func SanitiseRequestURL(logger zerolog.Logger) HandlerChain {
return func(next http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
// URLs are already cleaned by the Go ServeMux. This is in addition

dirtyPath := r.URL.Path
sanPath := userInputPolicy.Sanitize(dirtyPath)
if sanPath != dirtyPath {
logger.Warn().
Str("sanitised_path", sanPath).
Str("dirty_path", dirtyPath).
Msg("Path was sanitised")
}

url := r.URL
url.Path = sanPath
r.URL = url

next.ServeHTTP(w, r)
})
}
}
155 changes: 155 additions & 0 deletions server/index.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
/*
* Copyright (C) 2024. Genome Research Ltd. All rights reserved.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License,
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

package server

import (
"fmt"
"slices"
"sort"
"strings"

"github.com/cyverse/go-irodsclient/irods/types"
)

// ItemIndex is a collection of iRODS paths that have been tagged for presentation on
// the Sqyrrl web interface.
//
// The index is updated by the server as new items are added to the system. Each item
// has a category, which is used to group items together. The functions associated with
// the index allow the web interface to display the items in a structured way.
type ItemIndex struct {
items []Item
}

// Item represents a item in the index.
type Item struct {
Path string // The iRODS path of the item.
Size int64 // The size of the item in bytes.
Metadata []*types.IRODSMeta
ACL []*types.IRODSAccess
}

func NewItemIndex(items []Item) *ItemIndex {
return &ItemIndex{items: items}
}

// Categories returns a sorted list of all the categories in the index.
func (index *ItemIndex) Categories() []string {
var categorySet = make(map[string]struct{})
for _, item := range index.items {
categorySet[item.Category()] = struct{}{}
}

categories := make([]string, len(categorySet))
i := 0
for cat := range categorySet {
categories[i] = cat
i++
}
slices.Sort(categories)

return categories
}

// ItemsInCategory returns a sorted list of all the items in the index that are in the
// given category.
func (index *ItemIndex) ItemsInCategory(cat string) []Item {
var items []Item
for _, item := range index.items {
if item.Category() == cat {
items = append(items, item)
}
}
sort.SliceStable(items, func(i, j int) bool {
return items[i].Path < items[j].Path
})

return items
}

func (index *ItemIndex) String() string {
var sb strings.Builder
sb.WriteString("<ItemIndex")

for _, cat := range index.Categories() {
sb.WriteString(" ")
sb.WriteString(cat)
sb.WriteString(": ")

items := index.ItemsInCategory(cat)
for i, item := range items {
sb.WriteString(item.String())
if i < len(items)-1 {
sb.WriteString(", ")
}
}
}
sb.WriteString(">")

return sb.String()
}

func (item *Item) Category() string {
var category string
for _, meta := range item.Metadata {
if meta.Name == CategoryAttr {
category = meta.Value
break
}
}
return category
}

func (item *Item) SizeString() string {
if item.Size < 1024 {
return fmt.Sprintf("%d B", item.Size)
}

return fmt.Sprintf("%d KiB", item.Size/1024)
}

// MetadataStrings returns a sorted list of strings representing the metadata of the item.
func (item *Item) MetadataStrings() []string {
var meta []string
for _, m := range item.Metadata {
meta = append(meta, fmt.Sprintf("%s=%s", m.Name, m.Value))
}
slices.Sort(meta)

return meta
}

// ACLStrings returns a sorted list of strings representing the ACL of the item.
func (item *Item) ACLStrings() []string {
var acl []string
for _, a := range item.ACL {
acl = append(acl, fmt.Sprintf("%s#%s:%s", a.UserName, a.UserZone, a.AccessLevel))
}
slices.Sort(acl)

return acl
}

func (item *Item) String() string {
return fmt.Sprintf("<Item path='%s' category='%s' size:%d acl:[%s] metadata:[%s]>",
item.Path,
item.Category(),
item.Size,
strings.Join(item.ACLStrings(), ", "),
strings.Join(item.MetadataStrings(), ", "))
}
Loading

0 comments on commit 40e8d51

Please sign in to comment.