Skip to content

Commit

Permalink
SF-3172 Improve editor xml tag stripping (#2974)
Browse files Browse the repository at this point in the history
  • Loading branch information
siltomato authored Jan 28, 2025
1 parent a5e77ed commit f7aada1
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ import { NoticeService } from 'xforge-common/notice.service';
import { OnlineStatusService } from 'xforge-common/online-status.service';
import { UserService } from 'xforge-common/user.service';
import { filterNullish } from 'xforge-common/util/rxjs-util';
import { stripHtml } from 'xforge-common/util/string-util';
import { browserLinks, getLinkHTML, isBlink, issuesEmailTemplate, objectId } from 'xforge-common/utils';
import { XFValidators } from 'xforge-common/xfvalidators';
import { environment } from '../../../environments/environment';
Expand Down Expand Up @@ -1208,7 +1209,7 @@ export class EditorComponent extends DataLoadingComponent implements OnDestroy,

// Show the copyright notice
this.dialogService.openGenericDialog({
message: of(this.stripXml(copyrightNotice)),
message: of(stripHtml(copyrightNotice)),
options: [{ value: undefined, label: this.i18n.translate('dialog.close'), highlight: true }]
});
}
Expand Down Expand Up @@ -2034,7 +2035,7 @@ export class EditorComponent extends DataLoadingComponent implements OnDestroy,
/** Gets the information needed to format a particular featured verse. */
private getFeaturedVerseRefInfo(threadDoc: NoteThreadDoc): FeaturedVerseRefInfo | undefined {
const notes: Note[] = threadDoc.notesInOrderClone(threadDoc.data!.notes);
let preview: string = notes[0].content != null ? this.stripXml(notes[0].content.trim()) : '';
let preview: string = notes[0].content != null ? stripHtml(notes[0].content.trim()) : '';
if (notes.length > 1) {
preview += '\n' + translate('editor.more_notes', { count: notes.length - 1 });
}
Expand All @@ -2061,10 +2062,6 @@ export class EditorComponent extends DataLoadingComponent implements OnDestroy,
};
}

private stripXml(xmlContent: string): string {
return xmlContent.replace(/<[^>]+>/g, '');
}

/** Update the text anchors for the note threads in the current segment. */
private async updateVerseNoteThreadAnchors(affectedEmbeds: EmbedsByVerse[], delta: Delta): Promise<void> {
if (this.target == null || this.noteThreadQuery == null || this.noteThreadQuery.docs.length < 1) {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { areStringArraysEqual } from './string-util';
import { areStringArraysEqual, stripHtml } from './string-util';

describe('areStringArraysEqual', () => {
it('should return true if two empty arrays are compared', () => {
Expand All @@ -17,3 +17,61 @@ describe('areStringArraysEqual', () => {
expect(areStringArraysEqual(['a', 'b', 'c'], ['x', 'y', 'z'])).toBe(false);
});
});

describe('stripHtml', () => {
it('returns empty string for null/undefined input', () => {
expect(stripHtml('')).toBe('');
expect(stripHtml(null as any)).toBe('');
expect(stripHtml(undefined as any)).toBe('');
});

it('returns original string when no tags present', () => {
expect(stripHtml('Hello World')).toBe('Hello World');
expect(stripHtml('123')).toBe('123');
});

it('removes HTML tags', () => {
expect(stripHtml('<p>Hello</p>')).toBe('Hello');
expect(stripHtml('<div><span>Text</span></div>')).toBe('Text');
});

it('removes stray angle brackets', () => {
expect(stripHtml('a < b > c')).toBe('a b c');
expect(stripHtml('< text >')).toBe(' text ');
expect(stripHtml('<div>hi></div>')).toBe('hi');
});

it('handles complex HTML with attributes', () => {
expect(stripHtml('<div class="test" id="123">Content</div>')).toBe('Content');
expect(stripHtml('<a href="http://test.com">Link</a>')).toBe('Link');
});

it('handles nested tags and preserves whitespace', () => {
expect(stripHtml('<div>Outer <span>Inner</span> Text</div>')).toBe('Outer Inner Text');
expect(stripHtml('<p>Line 1</p>\n<p>Line 2</p>')).toBe('Line 1\nLine 2');
});

it('handles malformed HTML', () => {
expect(stripHtml('<div>Unclosed')).toBe('Unclosed');
expect(stripHtml('Unopened</div>')).toBe('Unopened');
expect(stripHtml('<div>Mismatched</span>')).toBe('Mismatched');
});

it('handles special characters', () => {
expect(stripHtml('&lt;div&gt;')).toBe('&lt;div&gt;');
expect(stripHtml('<div>&copy; 2024</div>')).toBe('© 2024');
});

it('prevents script injection', () => {
expect(stripHtml('<script>alert("xss")</script>')).toBe('alert("xss")');
expect(stripHtml('<scr<script>ipt>')).toBe('ipt');
expect(stripHtml('<script')).toBe('');
expect(stripHtml('<<script>script>')).toBe('script');
});

it('prevents other dangerous tags', () => {
expect(stripHtml('<style>body{color:red}</style>')).toBe('body{color:red}');
expect(stripHtml('<iframe src="evil.html">')).toBe('');
expect(stripHtml('<object data="evil.swf">')).toBe('');
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,26 @@ export function areStringArraysEqual(a: string[], b: string[]): boolean {

return true;
}

/**
* Removes html tags from a string while preserving the text content. Stray angle brackets are also removed.
* @param content The string with possible HTML tags to process.
* @returns The string with all tags and angle brackets removed.
*/
export function stripHtml(content: string): string {
if (content == null) {
return '';
}

// Skip processing if no angle brackets
if (!/[<>]/.test(content)) {
return content;
}

// Use 'text/html' to avoid parsing errors with 'text/xml', as it is more lenient
const doc: Document = new DOMParser().parseFromString(content, 'text/html');
let result: string = doc.documentElement.textContent || '';

// Remove any remaining stray angle brackets
return result.replace(/[<>]/g, '');
}

0 comments on commit f7aada1

Please sign in to comment.