src/third_party/protobuf/java/src/main/java/com/google/protobuf/Internal.java - cobalt - Git at Google

 // Protocol Buffers - Google's data interchange format
 // Copyright 2008 Google Inc.  All rights reserved.
 // http://code.google.com/p/protobuf/
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
 // met:
 //
 //     * Redistributions of source code must retain the above copyright
 // notice, this list of conditions and the following disclaimer.
 //     * Redistributions in binary form must reproduce the above
 // copyright notice, this list of conditions and the following disclaimer
 // in the documentation and/or other materials provided with the
 // distribution.
 //     * Neither the name of Google Inc. nor the names of its
 // contributors may be used to endorse or promote products derived from
 // this software without specific prior written permission.
 //
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 package com.google.protobuf;

 import java.io.UnsupportedEncodingException;

 /**
  * The classes contained within are used internally by the Protocol Buffer
  * library and generated message implementations. They are public only because
  * those generated messages do not reside in the {@code protobuf} package.
  * Others should not use this class directly.
  *
  * @author kenton@google.com (Kenton Varda)
  */
 public class Internal {
   /**
    * Helper called by generated code to construct default values for string
    * fields.
    * <p>
    * The protocol compiler does not actually contain a UTF-8 decoder -- it
    * just pushes UTF-8-encoded text around without touching it.  The one place
    * where this presents a problem is when generating Java string literals.
    * Unicode characters in the string literal would normally need to be encoded
    * using a Unicode escape sequence, which would require decoding them.
    * To get around this, protoc instead embeds the UTF-8 bytes into the
    * generated code and leaves it to the runtime library to decode them.
    * <p>
    * It gets worse, though.  If protoc just generated a byte array, like:
    *   new byte[] {0x12, 0x34, 0x56, 0x78}
    * Java actually generates *code* which allocates an array and then fills
    * in each value.  This is much less efficient than just embedding the bytes
    * directly into the bytecode.  To get around this, we need another
    * work-around.  String literals are embedded directly, so protoc actually
    * generates a string literal corresponding to the bytes.  The easiest way
    * to do this is to use the ISO-8859-1 character set, which corresponds to
    * the first 256 characters of the Unicode range.  Protoc can then use
    * good old CEscape to generate the string.
    * <p>
    * So we have a string literal which represents a set of bytes which
    * represents another string.  This function -- stringDefaultValue --
    * converts from the generated string to the string we actually want.  The
    * generated code calls this automatically.
    */
   public static String stringDefaultValue(String bytes) {
     try {
       return new String(bytes.getBytes("ISO-8859-1"), "UTF-8");
     } catch (UnsupportedEncodingException e) {
       // This should never happen since all JVMs are required to implement
       // both of the above character sets.
       throw new IllegalStateException(
           "Java VM does not support a standard character set.", e);
     }
   }

   /**
    * Helper called by generated code to construct default values for bytes
    * fields.
    * <p>
    * This is a lot like {@link #stringDefaultValue}, but for bytes fields.
    * In this case we only need the second of the two hacks -- allowing us to
    * embed raw bytes as a string literal with ISO-8859-1 encoding.
    */
   public static ByteString bytesDefaultValue(String bytes) {
     try {
       return ByteString.copyFrom(bytes.getBytes("ISO-8859-1"));
     } catch (UnsupportedEncodingException e) {
       // This should never happen since all JVMs are required to implement
       // ISO-8859-1.
       throw new IllegalStateException(
           "Java VM does not support a standard character set.", e);
     }
   }

   /**
    * Helper called by generated code to determine if a byte array is a valid
    * UTF-8 encoded string such that the original bytes can be converted to
    * a String object and then back to a byte array round tripping the bytes
    * without loss.
    * <p>
    * This is inspired by UTF_8.java in sun.nio.cs.
    *
    * @param byteString the string to check
    * @return whether the byte array is round trippable
    */
   public static boolean isValidUtf8(ByteString byteString) {
     int index = 0;
     int size = byteString.size();
     // To avoid the masking, we could change this to use bytes;
     // Then X > 0xC2 gets turned into X < -0xC2; X < 0x80
     // gets turned into X >= 0, etc.

     while (index < size) {
       int byte1 = byteString.byteAt(index++) & 0xFF;
       if (byte1 < 0x80) {
         // fast loop for single bytes
         continue;

         // we know from this point on that we have 2-4 byte forms
       } else if (byte1 < 0xC2 || byte1 > 0xF4) {
         // catch illegal first bytes: < C2 or > F4
         return false;
       }
       if (index >= size) {
         // fail if we run out of bytes
         return false;
       }
       int byte2 = byteString.byteAt(index++) & 0xFF;
       if (byte2 < 0x80 || byte2 > 0xBF) {
         // general trail-byte test
         return false;
       }
       if (byte1 <= 0xDF) {
         // two-byte form; general trail-byte test is sufficient
         continue;
       }

       // we know from this point on that we have 3 or 4 byte forms
       if (index >= size) {
         // fail if we run out of bytes
         return false;
       }
       int byte3 = byteString.byteAt(index++) & 0xFF;
       if (byte3 < 0x80 || byte3 > 0xBF) {
         // general trail-byte test
         return false;
       }
       if (byte1 <= 0xEF) {
         // three-byte form. Vastly more frequent than four-byte forms
         // The following has an extra test, but not worth restructuring
         if (byte1 == 0xE0 && byte2 < 0xA0 ||
             byte1 == 0xED && byte2 > 0x9F) {
           // check special cases of byte2
           return false;
         }

       } else {
         // four-byte form

         if (index >= size) {
           // fail if we run out of bytes
           return false;
         }
         int byte4 = byteString.byteAt(index++) & 0xFF;
         if (byte4 < 0x80 || byte4 > 0xBF) {
           // general trail-byte test
           return false;
         }
         // The following has an extra test, but not worth restructuring
         if (byte1 == 0xF0 && byte2 < 0x90 ||
             byte1 == 0xF4 && byte2 > 0x8F) {
           // check special cases of byte2
           return false;
         }
       }
     }
     return true;
   }

   /**
    * Interface for an enum value or value descriptor, to be used in FieldSet.
    * The lite library stores enum values directly in FieldSets but the full
    * library stores EnumValueDescriptors in order to better support reflection.
    */
   public interface EnumLite {
     int getNumber();
   }

   /**
    * Interface for an object which maps integers to {@link EnumLite}s.
    * {@link Descriptors.EnumDescriptor} implements this interface by mapping
    * numbers to {@link Descriptors.EnumValueDescriptor}s.  Additionally,
    * every generated enum type has a static method internalGetValueMap() which
    * returns an implementation of this type that maps numbers to enum values.
    */
   public interface EnumLiteMap<T extends EnumLite> {
     T findValueByNumber(int number);
   }
 }
	// Protocol Buffers - Google's data interchange format
	// Copyright 2008 Google Inc. All rights reserved.
	// http://code.google.com/p/protobuf/
	//
	// Redistribution and use in source and binary forms, with or without
	// modification, are permitted provided that the following conditions are
	// met:
	//
	// * Redistributions of source code must retain the above copyright
	// notice, this list of conditions and the following disclaimer.
	// * Redistributions in binary form must reproduce the above
	// copyright notice, this list of conditions and the following disclaimer
	// in the documentation and/or other materials provided with the
	// distribution.
	// * Neither the name of Google Inc. nor the names of its
	// contributors may be used to endorse or promote products derived from
	// this software without specific prior written permission.
	//
	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
	// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
	// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
	// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
	// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
	// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
	// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
	// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
	// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
	// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

	package com.google.protobuf;

	import java.io.UnsupportedEncodingException;

	/**
	* The classes contained within are used internally by the Protocol Buffer
	* library and generated message implementations. They are public only because
	* those generated messages do not reside in the {@code protobuf} package.
	* Others should not use this class directly.
	*
	* @author kenton@google.com (Kenton Varda)
	*/
	public class Internal {
	/**
	* Helper called by generated code to construct default values for string
	* fields.
	* <p>
	* The protocol compiler does not actually contain a UTF-8 decoder -- it
	* just pushes UTF-8-encoded text around without touching it. The one place
	* where this presents a problem is when generating Java string literals.
	* Unicode characters in the string literal would normally need to be encoded
	* using a Unicode escape sequence, which would require decoding them.
	* To get around this, protoc instead embeds the UTF-8 bytes into the
	* generated code and leaves it to the runtime library to decode them.
	* <p>
	* It gets worse, though. If protoc just generated a byte array, like:
	* new byte[] {0x12, 0x34, 0x56, 0x78}
	* Java actually generates code which allocates an array and then fills
	* in each value. This is much less efficient than just embedding the bytes
	* directly into the bytecode. To get around this, we need another
	* work-around. String literals are embedded directly, so protoc actually
	* generates a string literal corresponding to the bytes. The easiest way
	* to do this is to use the ISO-8859-1 character set, which corresponds to
	* the first 256 characters of the Unicode range. Protoc can then use
	* good old CEscape to generate the string.
	* <p>
	* So we have a string literal which represents a set of bytes which
	* represents another string. This function -- stringDefaultValue --
	* converts from the generated string to the string we actually want. The
	* generated code calls this automatically.
	*/
	public static String stringDefaultValue(String bytes) {
	try {
	return new String(bytes.getBytes("ISO-8859-1"), "UTF-8");
	} catch (UnsupportedEncodingException e) {
	// This should never happen since all JVMs are required to implement
	// both of the above character sets.
	throw new IllegalStateException(
	"Java VM does not support a standard character set.", e);
	}
	}

	/**
	* Helper called by generated code to construct default values for bytes
	* fields.
	* <p>
	* This is a lot like {@link #stringDefaultValue}, but for bytes fields.
	* In this case we only need the second of the two hacks -- allowing us to
	* embed raw bytes as a string literal with ISO-8859-1 encoding.
	*/
	public static ByteString bytesDefaultValue(String bytes) {
	try {
	return ByteString.copyFrom(bytes.getBytes("ISO-8859-1"));
	} catch (UnsupportedEncodingException e) {
	// This should never happen since all JVMs are required to implement
	// ISO-8859-1.
	throw new IllegalStateException(
	"Java VM does not support a standard character set.", e);
	}
	}

	/**
	* Helper called by generated code to determine if a byte array is a valid
	* UTF-8 encoded string such that the original bytes can be converted to
	* a String object and then back to a byte array round tripping the bytes
	* without loss.
	* <p>
	* This is inspired by UTF_8.java in sun.nio.cs.
	*
	* @param byteString the string to check
	* @return whether the byte array is round trippable
	*/
	public static boolean isValidUtf8(ByteString byteString) {
	int index = 0;
	int size = byteString.size();
	// To avoid the masking, we could change this to use bytes;
	// Then X > 0xC2 gets turned into X < -0xC2; X < 0x80
	// gets turned into X >= 0, etc.

	while (index < size) {
	int byte1 = byteString.byteAt(index++) & 0xFF;
	if (byte1 < 0x80) {
	// fast loop for single bytes
	continue;

	// we know from this point on that we have 2-4 byte forms
	} else if (byte1 < 0xC2 \|\| byte1 > 0xF4) {
	// catch illegal first bytes: < C2 or > F4
	return false;
	}
	if (index >= size) {
	// fail if we run out of bytes
	return false;
	}
	int byte2 = byteString.byteAt(index++) & 0xFF;
	if (byte2 < 0x80 \|\| byte2 > 0xBF) {
	// general trail-byte test
	return false;
	}
	if (byte1 <= 0xDF) {
	// two-byte form; general trail-byte test is sufficient
	continue;
	}

	// we know from this point on that we have 3 or 4 byte forms
	if (index >= size) {
	// fail if we run out of bytes
	return false;
	}
	int byte3 = byteString.byteAt(index++) & 0xFF;
	if (byte3 < 0x80 \|\| byte3 > 0xBF) {
	// general trail-byte test
	return false;
	}
	if (byte1 <= 0xEF) {
	// three-byte form. Vastly more frequent than four-byte forms
	// The following has an extra test, but not worth restructuring
	if (byte1 == 0xE0 && byte2 < 0xA0 \|\|
	byte1 == 0xED && byte2 > 0x9F) {
	// check special cases of byte2
	return false;
	}

	} else {
	// four-byte form

	if (index >= size) {
	// fail if we run out of bytes
	return false;
	}
	int byte4 = byteString.byteAt(index++) & 0xFF;
	if (byte4 < 0x80 \|\| byte4 > 0xBF) {
	// general trail-byte test
	return false;
	}
	// The following has an extra test, but not worth restructuring
	if (byte1 == 0xF0 && byte2 < 0x90 \|\|
	byte1 == 0xF4 && byte2 > 0x8F) {
	// check special cases of byte2
	return false;
	}
	}
	}
	return true;
	}

	/**
	* Interface for an enum value or value descriptor, to be used in FieldSet.
	* The lite library stores enum values directly in FieldSets but the full
	* library stores EnumValueDescriptors in order to better support reflection.
	*/
	public interface EnumLite {
	int getNumber();
	}

	/**
	* Interface for an object which maps integers to {@link EnumLite}s.
	* {@link Descriptors.EnumDescriptor} implements this interface by mapping
	* numbers to {@link Descriptors.EnumValueDescriptor}s. Additionally,
	* every generated enum type has a static method internalGetValueMap() which
	* returns an implementation of this type that maps numbers to enum values.
	*/
	public interface EnumLiteMap<T extends EnumLite> {
	T findValueByNumber(int number);
	}
	}