package cc.mallet.pipe;

import cc.mallet.pipe.iterator.FileIterator;
import cc.mallet.types.Instance;
import cc.mallet.types.InstanceList;
import ch.ethz.bsse.quasirecomb.utils.Utils;
import java.io.IOException;
import java.io.StringReader;
import javax.swing.text.html.HTMLEditorKit;

/* loaded from: input_file:main/QuasiRecomb-1.0.jar:cc/mallet/pipe/CharSequenceRemoveHTML.class */
public class CharSequenceRemoveHTML extends Pipe {

    /* loaded from: input_file:main/QuasiRecomb-1.0.jar:cc/mallet/pipe/CharSequenceRemoveHTML$ParserGetter.class */
    private class ParserGetter extends HTMLEditorKit {
        private ParserGetter() {
        }

        public HTMLEditorKit.Parser getParser() {
            return super.getParser();
        }
    }

    /* loaded from: input_file:main/QuasiRecomb-1.0.jar:cc/mallet/pipe/CharSequenceRemoveHTML$TagStripper.class */
    private class TagStripper extends HTMLEditorKit.ParserCallback {
        private String text = Utils.SAVEPATH;

        public TagStripper() {
        }

        public void handleText(char[] cArr, int i) {
            for (char c : cArr) {
                this.text += c;
            }
            this.text += "\n";
        }

        public String getText() {
            return this.text;
        }
    }

    @Override // cc.mallet.pipe.Pipe
    public Instance pipe(Instance instance) {
        String replaceAll = ((CharSequence) instance.getData()).toString().replaceAll("\\<NOFRAMES\\>", Utils.SAVEPATH).replaceAll("\\<\\/NOFRAMES\\>", Utils.SAVEPATH);
        HTMLEditorKit.Parser parser = new ParserGetter().getParser();
        TagStripper tagStripper = new TagStripper();
        try {
            parser.parse(new StringReader(replaceAll), tagStripper, true);
        } catch (IOException e) {
            System.err.println(e);
        }
        instance.setData(tagStripper.getText());
        return instance;
    }

    public static void main(String[] strArr) {
        String str = strArr[0];
        InstanceList instanceList = new InstanceList(new SerialPipes(new Pipe[]{new Input2CharSequence(), new CharSequenceRemoveHTML()}));
        instanceList.addThruPipe(new FileIterator(str, FileIterator.STARTING_DIRECTORIES));
        for (int i = 0; i < instanceList.size(); i++) {
            System.err.println(instanceList.get(i).getData());
        }
    }
}
