/*
 * Copyright (C) 2008-2009, Google Inc.
 * and other copyright owners as documented in the project's IP log.
 *
 * This program and the accompanying materials are made available
 * under the terms of the Eclipse Distribution License v1.0 which
 * accompanies this distribution, is reproduced below, and is
 * available at http://www.eclipse.org/org/documents/edl-v10.php
 *
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or
 * without modification, are permitted provided that the following
 * conditions are met:
 *
 * - Redistributions of source code must retain the above copyright
 *   notice, this list of conditions and the following disclaimer.
 *
 * - Redistributions in binary form must reproduce the above
 *   copyright notice, this list of conditions and the following
 *   disclaimer in the documentation and/or other materials provided
 *   with the distribution.
 *
 * - Neither the name of the Eclipse Foundation, Inc. nor the
 *   names of its contributors may be used to endorse or promote
 *   products derived from this software without specific prior
 *   written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
 * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

package org.eclipse.jgit.patch;

import static java.nio.charset.StandardCharsets.UTF_8;
import static org.eclipse.jgit.lib.Constants.encodeASCII;
import static org.eclipse.jgit.util.RawParseUtils.decode;
import static org.eclipse.jgit.util.RawParseUtils.decodeNoFallback;
import static org.eclipse.jgit.util.RawParseUtils.extractBinaryString;
import static org.eclipse.jgit.util.RawParseUtils.match;
import static org.eclipse.jgit.util.RawParseUtils.nextLF;
import static org.eclipse.jgit.util.RawParseUtils.parseBase10;

import java.io.IOException;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

import org.eclipse.jgit.diff.DiffEntry;
import org.eclipse.jgit.diff.EditList;
import org.eclipse.jgit.internal.JGitText;
import org.eclipse.jgit.lib.AbbreviatedObjectId;
import org.eclipse.jgit.lib.FileMode;
import org.eclipse.jgit.util.QuotedString;
import org.eclipse.jgit.util.RawParseUtils;
import org.eclipse.jgit.util.TemporaryBuffer;

Patch header describing an action for a single file path.
/** * Patch header describing an action for a single file path. */
public class FileHeader extends DiffEntry { private static final byte[] OLD_MODE = encodeASCII("old mode "); //$NON-NLS-1$ private static final byte[] NEW_MODE = encodeASCII("new mode "); //$NON-NLS-1$ static final byte[] DELETED_FILE_MODE = encodeASCII("deleted file mode "); //$NON-NLS-1$ static final byte[] NEW_FILE_MODE = encodeASCII("new file mode "); //$NON-NLS-1$ private static final byte[] COPY_FROM = encodeASCII("copy from "); //$NON-NLS-1$ private static final byte[] COPY_TO = encodeASCII("copy to "); //$NON-NLS-1$ private static final byte[] RENAME_OLD = encodeASCII("rename old "); //$NON-NLS-1$ private static final byte[] RENAME_NEW = encodeASCII("rename new "); //$NON-NLS-1$ private static final byte[] RENAME_FROM = encodeASCII("rename from "); //$NON-NLS-1$ private static final byte[] RENAME_TO = encodeASCII("rename to "); //$NON-NLS-1$ private static final byte[] SIMILARITY_INDEX = encodeASCII("similarity index "); //$NON-NLS-1$ private static final byte[] DISSIMILARITY_INDEX = encodeASCII("dissimilarity index "); //$NON-NLS-1$ static final byte[] INDEX = encodeASCII("index "); //$NON-NLS-1$ static final byte[] OLD_NAME = encodeASCII("--- "); //$NON-NLS-1$ static final byte[] NEW_NAME = encodeASCII("+++ "); //$NON-NLS-1$
Type of patch used by this file.
/** Type of patch used by this file. */
public static enum PatchType {
A traditional unified diff style patch of a text file.
/** A traditional unified diff style patch of a text file. */
UNIFIED,
An empty patch with a message "Binary files ... differ"
/** An empty patch with a message "Binary files ... differ" */
BINARY,
A Git binary patch, holding pre and post image deltas
/** A Git binary patch, holding pre and post image deltas */
GIT_BINARY; }
Buffer holding the patch data for this file.
/** Buffer holding the patch data for this file. */
final byte[] buf;
Offset within buf to the "diff ..." line.
/** Offset within {@link #buf} to the "diff ..." line. */
final int startOffset;
Position 1 past the end of this file within buf.
/** Position 1 past the end of this file within {@link #buf}. */
int endOffset;
Type of patch used to modify this file
/** Type of patch used to modify this file */
PatchType patchType;
The hunks of this file
/** The hunks of this file */
private List<HunkHeader> hunks;
If patchType is PatchType.GIT_BINARY, the new image
/** If {@link #patchType} is {@link PatchType#GIT_BINARY}, the new image */
BinaryHunk forwardBinaryHunk;
If patchType is PatchType.GIT_BINARY, the old image
/** If {@link #patchType} is {@link PatchType#GIT_BINARY}, the old image */
BinaryHunk reverseBinaryHunk;
Constructs a new FileHeader
Params:
  • headerLines – buffer holding the diff header for this file
  • edits – the edits for this file
  • type – the type of patch used to modify this file
/** * Constructs a new FileHeader * * @param headerLines * buffer holding the diff header for this file * @param edits * the edits for this file * @param type * the type of patch used to modify this file */
public FileHeader(byte[] headerLines, EditList edits, PatchType type) { this(headerLines, 0); endOffset = headerLines.length; int ptr = parseGitFileName(Patch.DIFF_GIT.length, headerLines.length); parseGitHeaders(ptr, headerLines.length); this.patchType = type; addHunk(new HunkHeader(this, edits)); } FileHeader(byte[] b, int offset) { buf = b; startOffset = offset; changeType = ChangeType.MODIFY; // unless otherwise designated patchType = PatchType.UNIFIED; } int getParentCount() { return 1; }
Get the byte array holding this file's patch script.
Returns:the byte array holding this file's patch script.
/** * Get the byte array holding this file's patch script. * * @return the byte array holding this file's patch script. */
public byte[] getBuffer() { return buf; }
Get offset of the start of this file's script in getBuffer().
Returns:offset of the start of this file's script in getBuffer().
/** * Get offset of the start of this file's script in {@link #getBuffer()}. * * @return offset of the start of this file's script in * {@link #getBuffer()}. */
public int getStartOffset() { return startOffset; }
Get offset one past the end of the file script.
Returns:offset one past the end of the file script.
/** * Get offset one past the end of the file script. * * @return offset one past the end of the file script. */
public int getEndOffset() { return endOffset; }
Convert the patch script for this file into a string.

The default character encoding (StandardCharsets.UTF_8) is assumed for both the old and new files.

Returns:the patch script, as a Unicode string.
/** * Convert the patch script for this file into a string. * <p> * The default character encoding * ({@link java.nio.charset.StandardCharsets#UTF_8}) is assumed for both the * old and new files. * * @return the patch script, as a Unicode string. */
public String getScriptText() { return getScriptText(null, null); }
Convert the patch script for this file into a string.
Params:
  • oldCharset – hint character set to decode the old lines with.
  • newCharset – hint character set to decode the new lines with.
Returns:the patch script, as a Unicode string.
/** * Convert the patch script for this file into a string. * * @param oldCharset * hint character set to decode the old lines with. * @param newCharset * hint character set to decode the new lines with. * @return the patch script, as a Unicode string. */
public String getScriptText(Charset oldCharset, Charset newCharset) { return getScriptText(new Charset[] { oldCharset, newCharset }); } String getScriptText(Charset[] charsetGuess) { if (getHunks().isEmpty()) { // If we have no hunks then we can safely assume the entire // patch is a binary style patch, or a meta-data only style // patch. Either way the encoding of the headers should be // strictly 7-bit US-ASCII and the body is either 7-bit ASCII // (due to the base 85 encoding used for a BinaryHunk) or is // arbitrary noise we have chosen to ignore and not understand // (e.g. the message "Binary files ... differ"). // return extractBinaryString(buf, startOffset, endOffset); } if (charsetGuess != null && charsetGuess.length != getParentCount() + 1) throw new IllegalArgumentException(MessageFormat.format( JGitText.get().expectedCharacterEncodingGuesses, Integer.valueOf(getParentCount() + 1))); if (trySimpleConversion(charsetGuess)) { Charset cs = charsetGuess != null ? charsetGuess[0] : null; if (cs == null) { cs = UTF_8; } try { return decodeNoFallback(cs, buf, startOffset, endOffset); } catch (CharacterCodingException cee) { // Try the much slower, more-memory intensive version which // can handle a character set conversion patch. } } final StringBuilder r = new StringBuilder(endOffset - startOffset); // Always treat the headers as US-ASCII; Git file names are encoded // in a C style escape if any character has the high-bit set. // final int hdrEnd = getHunks().get(0).getStartOffset(); for (int ptr = startOffset; ptr < hdrEnd;) { final int eol = Math.min(hdrEnd, nextLF(buf, ptr)); r.append(extractBinaryString(buf, ptr, eol)); ptr = eol; } final String[] files = extractFileLines(charsetGuess); final int[] offsets = new int[files.length]; for (HunkHeader h : getHunks()) h.extractFileLines(r, files, offsets); return r.toString(); } private static boolean trySimpleConversion(Charset[] charsetGuess) { if (charsetGuess == null) return true; for (int i = 1; i < charsetGuess.length; i++) { if (charsetGuess[i] != charsetGuess[0]) return false; } return true; } private String[] extractFileLines(Charset[] csGuess) { final TemporaryBuffer[] tmp = new TemporaryBuffer[getParentCount() + 1]; try { for (int i = 0; i < tmp.length; i++) tmp[i] = new TemporaryBuffer.Heap(Integer.MAX_VALUE); for (HunkHeader h : getHunks()) h.extractFileLines(tmp); final String[] r = new String[tmp.length]; for (int i = 0; i < tmp.length; i++) { Charset cs = csGuess != null ? csGuess[i] : null; if (cs == null) { cs = UTF_8; } r[i] = RawParseUtils.decode(cs, tmp[i].toByteArray()); } return r; } catch (IOException ioe) { throw new RuntimeException(JGitText.get().cannotConvertScriptToText, ioe); } }
Get style of patch used to modify this file.
Returns:style of patch used to modify this file.
/** * Get style of patch used to modify this file. * * @return style of patch used to modify this file. */
public PatchType getPatchType() { return patchType; }
Whether this patch modifies metadata about a file
Returns:true if this patch modifies metadata about a file .
/** * Whether this patch modifies metadata about a file * * @return {@code true} if this patch modifies metadata about a file . */
public boolean hasMetaDataChanges() { return changeType != ChangeType.MODIFY || newMode != oldMode; }
Get hunks altering this file; in order of appearance in patch
Returns:hunks altering this file; in order of appearance in patch.
/** * Get hunks altering this file; in order of appearance in patch * * @return hunks altering this file; in order of appearance in patch. */
public List<? extends HunkHeader> getHunks() { if (hunks == null) return Collections.emptyList(); return hunks; } void addHunk(HunkHeader h) { if (h.getFileHeader() != this) throw new IllegalArgumentException(JGitText.get().hunkBelongsToAnotherFile); if (hunks == null) hunks = new ArrayList<>(); hunks.add(h); } HunkHeader newHunkHeader(int offset) { return new HunkHeader(this, offset); }
Get the new-image delta/literal if this is a PatchType.GIT_BINARY.
Returns:the new-image delta/literal if this is a PatchType.GIT_BINARY.
/** * Get the new-image delta/literal if this is a * {@link PatchType#GIT_BINARY}. * * @return the new-image delta/literal if this is a * {@link PatchType#GIT_BINARY}. */
public BinaryHunk getForwardBinaryHunk() { return forwardBinaryHunk; }
Get the old-image delta/literal if this is a PatchType.GIT_BINARY.
Returns:the old-image delta/literal if this is a PatchType.GIT_BINARY.
/** * Get the old-image delta/literal if this is a * {@link PatchType#GIT_BINARY}. * * @return the old-image delta/literal if this is a * {@link PatchType#GIT_BINARY}. */
public BinaryHunk getReverseBinaryHunk() { return reverseBinaryHunk; }
Convert to a list describing the content edits performed on this file.
Returns:a list describing the content edits performed on this file.
/** * Convert to a list describing the content edits performed on this file. * * @return a list describing the content edits performed on this file. */
public EditList toEditList() { final EditList r = new EditList(); for (HunkHeader hunk : hunks) r.addAll(hunk.toEditList()); return r; }
Parse a "diff --git" or "diff --cc" line.
Params:
  • ptr – first character after the "diff --git " or "diff --cc " part.
  • end – one past the last position to parse.
Returns:first character after the LF at the end of the line; -1 on error.
/** * Parse a "diff --git" or "diff --cc" line. * * @param ptr * first character after the "diff --git " or "diff --cc " part. * @param end * one past the last position to parse. * @return first character after the LF at the end of the line; -1 on error. */
int parseGitFileName(int ptr, int end) { final int eol = nextLF(buf, ptr); final int bol = ptr; if (eol >= end) { return -1; } // buffer[ptr..eol] looks like "a/foo b/foo\n". After the first // A regex to match this is "^[^/]+/(.*?) [^/+]+/\1\n$". There // is only one way to split the line such that text to the left // of the space matches the text to the right, excluding the part // before the first slash. // final int aStart = nextLF(buf, ptr, '/'); if (aStart >= eol) return eol; while (ptr < eol) { final int sp = nextLF(buf, ptr, ' '); if (sp >= eol) { // We can't split the header, it isn't valid. // This may be OK if this is a rename patch. // return eol; } final int bStart = nextLF(buf, sp, '/'); if (bStart >= eol) return eol; // If buffer[aStart..sp - 1] = buffer[bStart..eol - 1] // we have a valid split. // if (eq(aStart, sp - 1, bStart, eol - 1)) { if (buf[bol] == '"') { // We're a double quoted name. The region better end // in a double quote too, and we need to decode the // characters before reading the name. // if (buf[sp - 2] != '"') { return eol; } oldPath = QuotedString.GIT_PATH.dequote(buf, bol, sp - 1); oldPath = p1(oldPath); } else { oldPath = decode(UTF_8, buf, aStart, sp - 1); } newPath = oldPath; return eol; } // This split wasn't correct. Move past the space and try // another split as the space must be part of the file name. // ptr = sp; } return eol; } int parseGitHeaders(int ptr, int end) { while (ptr < end) { final int eol = nextLF(buf, ptr); if (isHunkHdr(buf, ptr, eol) >= 1) { // First hunk header; break out and parse them later. break; } else if (match(buf, ptr, OLD_NAME) >= 0) { parseOldName(ptr, eol); } else if (match(buf, ptr, NEW_NAME) >= 0) { parseNewName(ptr, eol); } else if (match(buf, ptr, OLD_MODE) >= 0) { oldMode = parseFileMode(ptr + OLD_MODE.length, eol); } else if (match(buf, ptr, NEW_MODE) >= 0) { newMode = parseFileMode(ptr + NEW_MODE.length, eol); } else if (match(buf, ptr, DELETED_FILE_MODE) >= 0) { oldMode = parseFileMode(ptr + DELETED_FILE_MODE.length, eol); newMode = FileMode.MISSING; changeType = ChangeType.DELETE; } else if (match(buf, ptr, NEW_FILE_MODE) >= 0) { parseNewFileMode(ptr, eol); } else if (match(buf, ptr, COPY_FROM) >= 0) { oldPath = parseName(oldPath, ptr + COPY_FROM.length, eol); changeType = ChangeType.COPY; } else if (match(buf, ptr, COPY_TO) >= 0) { newPath = parseName(newPath, ptr + COPY_TO.length, eol); changeType = ChangeType.COPY; } else if (match(buf, ptr, RENAME_OLD) >= 0) { oldPath = parseName(oldPath, ptr + RENAME_OLD.length, eol); changeType = ChangeType.RENAME; } else if (match(buf, ptr, RENAME_NEW) >= 0) { newPath = parseName(newPath, ptr + RENAME_NEW.length, eol); changeType = ChangeType.RENAME; } else if (match(buf, ptr, RENAME_FROM) >= 0) { oldPath = parseName(oldPath, ptr + RENAME_FROM.length, eol); changeType = ChangeType.RENAME; } else if (match(buf, ptr, RENAME_TO) >= 0) { newPath = parseName(newPath, ptr + RENAME_TO.length, eol); changeType = ChangeType.RENAME; } else if (match(buf, ptr, SIMILARITY_INDEX) >= 0) { score = parseBase10(buf, ptr + SIMILARITY_INDEX.length, null); } else if (match(buf, ptr, DISSIMILARITY_INDEX) >= 0) { score = parseBase10(buf, ptr + DISSIMILARITY_INDEX.length, null); } else if (match(buf, ptr, INDEX) >= 0) { parseIndexLine(ptr + INDEX.length, eol); } else { // Probably an empty patch (stat dirty). break; } ptr = eol; } return ptr; } void parseOldName(int ptr, int eol) { oldPath = p1(parseName(oldPath, ptr + OLD_NAME.length, eol)); if (oldPath == DEV_NULL) changeType = ChangeType.ADD; } void parseNewName(int ptr, int eol) { newPath = p1(parseName(newPath, ptr + NEW_NAME.length, eol)); if (newPath == DEV_NULL) changeType = ChangeType.DELETE; } void parseNewFileMode(int ptr, int eol) { oldMode = FileMode.MISSING; newMode = parseFileMode(ptr + NEW_FILE_MODE.length, eol); changeType = ChangeType.ADD; } int parseTraditionalHeaders(int ptr, int end) { while (ptr < end) { final int eol = nextLF(buf, ptr); if (isHunkHdr(buf, ptr, eol) >= 1) { // First hunk header; break out and parse them later. break; } else if (match(buf, ptr, OLD_NAME) >= 0) { parseOldName(ptr, eol); } else if (match(buf, ptr, NEW_NAME) >= 0) { parseNewName(ptr, eol); } else { // Possibly an empty patch. break; } ptr = eol; } return ptr; } private String parseName(String expect, int ptr, int end) { if (ptr == end) return expect; String r; if (buf[ptr] == '"') { // New style GNU diff format // r = QuotedString.GIT_PATH.dequote(buf, ptr, end - 1); } else { // Older style GNU diff format, an optional tab ends the name. // int tab = end; while (ptr < tab && buf[tab - 1] != '\t') tab--; if (ptr == tab) tab = end; r = decode(UTF_8, buf, ptr, tab - 1); } if (r.equals(DEV_NULL)) r = DEV_NULL; return r; } private static String p1(final String r) { final int s = r.indexOf('/'); return s > 0 ? r.substring(s + 1) : r; } FileMode parseFileMode(int ptr, int end) { int tmp = 0; while (ptr < end - 1) { tmp <<= 3; tmp += buf[ptr++] - '0'; } return FileMode.fromBits(tmp); } void parseIndexLine(int ptr, int end) { // "index $asha1..$bsha1[ $mode]" where $asha1 and $bsha1 // can be unique abbreviations // final int dot2 = nextLF(buf, ptr, '.'); final int mode = nextLF(buf, dot2, ' '); oldId = AbbreviatedObjectId.fromString(buf, ptr, dot2 - 1); newId = AbbreviatedObjectId.fromString(buf, dot2 + 1, mode - 1); if (mode < end) newMode = oldMode = parseFileMode(mode, end); } private boolean eq(int aPtr, int aEnd, int bPtr, int bEnd) { if (aEnd - aPtr != bEnd - bPtr) { return false; } while (aPtr < aEnd) { if (buf[aPtr++] != buf[bPtr++]) return false; } return true; }
Determine if this is a patch hunk header.
Params:
  • buf – the buffer to scan
  • start – first position in the buffer to evaluate
  • end – last position to consider; usually the end of the buffer ( buf.length) or the first position on the next line. This is only used to avoid very long runs of '@' from killing the scan loop.
Returns:the number of "ancestor revisions" in the hunk header. A traditional two-way diff ("@@ -...") returns 1; a combined diff for a 3 way-merge returns 3. If this is not a hunk header, 0 is returned instead.
/** * Determine if this is a patch hunk header. * * @param buf * the buffer to scan * @param start * first position in the buffer to evaluate * @param end * last position to consider; usually the end of the buffer ( * <code>buf.length</code>) or the first position on the next * line. This is only used to avoid very long runs of '@' from * killing the scan loop. * @return the number of "ancestor revisions" in the hunk header. A * traditional two-way diff ("@@ -...") returns 1; a combined diff * for a 3 way-merge returns 3. If this is not a hunk header, 0 is * returned instead. */
static int isHunkHdr(byte[] buf, int start, int end) { int ptr = start; while (ptr < end && buf[ptr] == '@') ptr++; if (ptr - start < 2) return 0; if (ptr == end || buf[ptr++] != ' ') return 0; if (ptr == end || buf[ptr++] != '-') return 0; return (ptr - 3) - start; } }