Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add replacement into json output #2883

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions crates/core/flags/hiargs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -589,6 +589,7 @@ impl HiArgs {
.pretty(false)
.max_matches(self.max_count)
.always_begin_end(false)
.replacement(self.replace.clone().map(|r| r.into()))
.build(wtr)
}

Expand Down
112 changes: 104 additions & 8 deletions crates/printer/src/json.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::{
io::{self, Write},
path::Path,
sync::Arc,
time::Instant,
};

Expand All @@ -13,7 +14,8 @@ use {
};

use crate::{
counter::CounterWriter, jsont, stats::Stats, util::find_iter_at_in_context,
counter::CounterWriter, jsont, stats::Stats,
util::find_iter_at_in_context, util::Replacer,
};

/// The configuration for the JSON printer.
Expand All @@ -26,11 +28,17 @@ struct Config {
pretty: bool,
max_matches: Option<u64>,
always_begin_end: bool,
replacement: Arc<Option<Vec<u8>>>,
}

impl Default for Config {
fn default() -> Config {
Config { pretty: false, max_matches: None, always_begin_end: false }
Config {
pretty: false,
max_matches: None,
always_begin_end: false,
replacement: Arc::new(None),
}
}
}

Expand Down Expand Up @@ -98,6 +106,24 @@ impl JSONBuilder {
self.config.always_begin_end = yes;
self
}

/// Set the bytes that will be used to replace each occurrence of a match
/// found.
///
/// The replacement bytes given may include references to capturing groups,
/// which may either be in index form (e.g., `$2`) or can reference named
/// capturing groups if present in the original pattern (e.g., `$foo`).
///
/// For documentation on the full format, please see the `Capture` trait's
/// `interpolate` method in the
/// [grep-printer](https://docs.rs/grep-printer) crate.
pub fn replacement(
&mut self,
replacement: Option<Vec<u8>>,
) -> &mut JSONBuilder {
self.config.replacement = Arc::new(replacement);
self
}
}

/// The JSON printer, which emits results in a JSON lines format.
Expand Down Expand Up @@ -256,7 +282,8 @@ impl JSONBuilder {
/// encoded, then the byte offsets correspond to the data after base64
/// decoding.) The `submatch` objects are guaranteed to be sorted by their
/// starting offsets. Note that it is possible for this array to be empty,
/// for example, when searching reports inverted matches.
/// for example, when searching reports inverted matches. If the configuration
/// specifies a replacement, the resulting replacement text is also present.
///
/// #### Message: **context**
///
Expand Down Expand Up @@ -286,7 +313,9 @@ impl JSONBuilder {
/// decoding.) The `submatch` objects are guaranteed to be sorted by
/// their starting offsets. Note that it is possible for this array to be
/// non-empty, for example, when searching reports inverted matches such that
/// the original matcher could match things in the contextual lines.
/// the original matcher could match things in the contextual lines. If the
/// configuration specifies a replacemement, the resulting replacement text
/// is also present.
///
/// #### Object: **submatch**
///
Expand All @@ -308,6 +337,10 @@ impl JSONBuilder {
/// the `lines` field in the
/// [`match`](#message-match) or [`context`](#message-context)
/// messages.
/// * **replacement** (optional) - An
/// [arbitrary data object](#object-arbitrary-data) corresponding to the
/// replacement text for this submatch, if the configuration specifies
/// a replacement.
///
/// #### Object: **stats**
///
Expand Down Expand Up @@ -447,6 +480,23 @@ impl JSONBuilder {
/// }
/// }
/// ```
/// and here's what a match type item would looks like if a replacement text
/// of 'Moriarity' was given as a parameter:
/// ```json
/// {
/// "type": "match",
/// "data": {
/// "path": {"text": "/home/andrew/sherlock"},
/// "lines": {"text": "For the Doctor Watsons of this world, as opposed to the Sherlock\n"},
/// "line_number": 1,
/// "absolute_offset": 0,
/// "submatches": [
/// {"match": {"text": "Watson"}, "replacement": {"text": "Moriarity"}, "start": 15, "end": 21}
/// ]
/// }
/// }
/// ```

#[derive(Clone, Debug)]
pub struct JSON<W> {
config: Config,
Expand All @@ -471,6 +521,7 @@ impl<W: io::Write> JSON<W> {
) -> JSONSink<'static, 's, M, W> {
JSONSink {
matcher,
replacer: Replacer::new(),
json: self,
path: None,
start_time: Instant::now(),
Expand All @@ -497,6 +548,7 @@ impl<W: io::Write> JSON<W> {
{
JSONSink {
matcher,
replacer: Replacer::new(),
json: self,
path: Some(path.as_ref()),
start_time: Instant::now(),
Expand Down Expand Up @@ -559,6 +611,7 @@ impl<W> JSON<W> {
#[derive(Debug)]
pub struct JSONSink<'p, 's, M: Matcher, W> {
matcher: M,
replacer: Replacer<M>,
json: &'s mut JSON<W>,
path: Option<&'p Path>,
start_time: Instant,
Expand Down Expand Up @@ -643,6 +696,31 @@ impl<'p, 's, M: Matcher, W: io::Write> JSONSink<'p, 's, M, W> {
Ok(())
}

/// If the configuration specifies a replacement, then this executes the
/// replacement, lazily allocating memory if necessary.
///
/// To access the result of a replacement, use `replacer.replacement()`.
fn replace(
&mut self,
searcher: &Searcher,
bytes: &[u8],
range: std::ops::Range<usize>,
) -> io::Result<()> {
self.replacer.clear();
if self.json.config.replacement.is_some() {
let replacement =
(*self.json.config.replacement).as_ref().map(|r| &*r).unwrap();
self.replacer.replace_all(
searcher,
&self.matcher,
bytes,
range,
replacement,
)?;
}
Ok(())
}

/// Returns true if this printer should quit.
///
/// This implements the logic for handling quitting after seeing a certain
Expand Down Expand Up @@ -711,10 +789,15 @@ impl<'p, 's, M: Matcher, W: io::Write> Sink for JSONSink<'p, 's, M, W> {
mat.buffer(),
mat.bytes_range_in_buffer(),
)?;
self.replace(searcher, mat.buffer(), mat.bytes_range_in_buffer())?;
self.stats.add_matches(self.json.matches.len() as u64);
self.stats.add_matched_lines(mat.lines().count() as u64);

let submatches = SubMatches::new(mat.bytes(), &self.json.matches);
let submatches = SubMatches::new(
mat.bytes(),
&self.json.matches,
self.replacer.replacement(),
);
let msg = jsont::Message::Match(jsont::Match {
path: self.path,
lines: mat.bytes(),
Expand All @@ -740,7 +823,12 @@ impl<'p, 's, M: Matcher, W: io::Write> Sink for JSONSink<'p, 's, M, W> {
}
let submatches = if searcher.invert_match() {
self.record_matches(searcher, ctx.bytes(), 0..ctx.bytes().len())?;
SubMatches::new(ctx.bytes(), &self.json.matches)
self.replace(searcher, ctx.bytes(), 0..ctx.bytes().len())?;
SubMatches::new(
ctx.bytes(),
&self.json.matches,
self.replacer.replacement(),
)
} else {
SubMatches::empty()
};
Expand Down Expand Up @@ -831,19 +919,27 @@ enum SubMatches<'a> {
impl<'a> SubMatches<'a> {
/// Create a new set of match ranges from a set of matches and the
/// corresponding bytes that those matches apply to.
fn new(bytes: &'a [u8], matches: &[Match]) -> SubMatches<'a> {
fn new(
bytes: &'a [u8],
matches: &[Match],
replacement: Option<(&'a [u8], &'a [Match])>,
) -> SubMatches<'a> {
if matches.len() == 1 {
let mat = matches[0];
SubMatches::Small([jsont::SubMatch {
m: &bytes[mat],
replacement: replacement
.map(|(rbuf, rmatches)| &rbuf[rmatches[0]]),
start: mat.start(),
end: mat.end(),
}])
} else {
let mut match_ranges = vec![];
for &mat in matches {
for (i, &mat) in matches.iter().enumerate() {
match_ranges.push(jsont::SubMatch {
m: &bytes[mat],
replacement: replacement
.map(|(rbuf, rmatches)| &rbuf[rmatches[i]]),
start: mat.start(),
end: mat.end(),
});
Expand Down
4 changes: 4 additions & 0 deletions crates/printer/src/jsont.rs
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ impl<'a> serde::Serialize for Context<'a> {

pub(crate) struct SubMatch<'a> {
pub(crate) m: &'a [u8],
pub(crate) replacement: Option<&'a [u8]>,
pub(crate) start: usize,
pub(crate) end: usize,
}
Expand All @@ -148,6 +149,9 @@ impl<'a> serde::Serialize for SubMatch<'a> {

let mut state = s.serialize_struct("SubMatch", 3)?;
state.serialize_field("match", &Data::from_bytes(self.m))?;
if let Some(r) = self.replacement {
state.serialize_field("replacement", &Data::from_bytes(r))?;
}
state.serialize_field("start", &self.start)?;
state.serialize_field("end", &self.end)?;
state.end()
Expand Down