Code Monkey home page Code Monkey logo

Comments (3)

SergeiAlonichau avatar SergeiAlonichau commented on August 27, 2024

There is no official C# wrapper available, but we have designed API's so they are easy to use from C#. Please look at this example:

`

namespace MyNamespace
{
    public static class BlingFireUtils
    {
        public static IEnumerable<string> GetSentences(string paragraph)
        {
            // use Bling Fire TOK for sentence breaking
            byte[] paraBytes = Encoding.UTF8.GetBytes(paragraph);
            int MaxLength = (2 * paraBytes.Length) + 1;
            byte[] outputBytes = new byte[MaxLength];

            // native call returns '\n' delimited sentences, and adds 0 byte at the end
            Int32 actualLength = TextToSentences(paraBytes, (Int32)paraBytes.Length, outputBytes, MaxLength);
            if (0 < actualLength - 1 && actualLength <= MaxLength)
            {
                string sentencesStr = Encoding.UTF8.GetString(SubArray(outputBytes, 0, actualLength - 1));
                var sentences = sentencesStr.Split(_justNewLineChar, StringSplitOptions.RemoveEmptyEntries);
                foreach (var s in sentences)
                {
                    yield return s;
                }
            }
        }

        public static IEnumerable<Tuple<string, int, int>> GetSentencesWithOffsets(string paragraph)
        {
            // use Bling Fire TOK for sentence breaking
            return GetSentencesWithOffsets(Encoding.UTF8.GetBytes(paragraph));
        }

        public static IEnumerable<Tuple<string, int, int>> GetSentencesWithOffsets(byte[] paraBytes)
        {
            // use Bling Fire TOK for sentence breaking
            int MaxLength = (2 * paraBytes.Length) + 1;
            byte[] outputBytes = new byte[MaxLength];
            int[] startOffsets = new int[MaxLength];
            int[] endOffsets = new int[MaxLength];

            // native call returns '\n' delimited sentences, and adds 0 byte at the end
            Int32 actualLength = TextToSentencesWithOffsets(paraBytes, (Int32)paraBytes.Length, outputBytes, startOffsets, endOffsets, MaxLength);
            if (0 < actualLength - 1 && actualLength <= MaxLength)
            {
                string sentencesStr = Encoding.UTF8.GetString(SubArray(outputBytes, 0, actualLength - 1));
                var sentences = sentencesStr.Split(_justNewLineChar, StringSplitOptions.RemoveEmptyEntries);
                for (int i = 0; i < sentences.Length; ++i)
                {
                    yield return new Tuple<string, int, int>(sentences[i], startOffsets[i], endOffsets[i]);
                }
            }
        }


        public static IEnumerable<string> GetWords(string sentence)
        {
            // use Bling Fire TOK for sentence breaking
            byte[] paraBytes = Encoding.UTF8.GetBytes(sentence);
            int MaxLength = (2 * paraBytes.Length) + 1;
            byte[] outputBytes = new byte[MaxLength];

            // native call returns '\n' delimited sentences, and adds 0 byte at the end
            Int32 actualLength = TextToWords(paraBytes, (Int32)paraBytes.Length, outputBytes, MaxLength);
            if (0 < actualLength - 1 && actualLength <= MaxLength)
            {
                string wordsStr = Encoding.UTF8.GetString(SubArray(outputBytes, 0, actualLength - 1));
                var words = wordsStr.Split(_justSpaceChar, StringSplitOptions.RemoveEmptyEntries);
                foreach (var w in words)
                {
                    yield return w;
                }
            }
        }

        public static IEnumerable<Tuple<string, int, int>> GetWordsWithOffsets(string sentence)
        {
            // use Bling Fire TOK for sentence breaking
            byte[] paraBytes = Encoding.UTF8.GetBytes(sentence);
            int MaxLength = (2 * paraBytes.Length) + 1;
            byte[] outputBytes = new byte[MaxLength];
            int[] startOffsets = new int[MaxLength];
            int[] endOffsets = new int[MaxLength];

            // native call returns '\n' delimited sentences, and adds 0 byte at the end
            Int32 actualLength = TextToWordsWithOffsets(paraBytes, (Int32)paraBytes.Length, outputBytes, startOffsets, endOffsets, MaxLength);
            if (0 < actualLength - 1 && actualLength <= MaxLength)
            {
                string wordsStr = Encoding.UTF8.GetString(SubArray(outputBytes, 0, actualLength - 1));
                var words = wordsStr.Split(_justSpaceChar, StringSplitOptions.RemoveEmptyEntries);
                for (int i = 0; i < words.Length; ++i)
                {
                    yield return new Tuple<string, int, int>(words[i], startOffsets[i], endOffsets[i]);
                }
            }
        }

        public static T[] SubArray<T>(this T[] data, int index, int length)
        {
            T[] result = new T[length];
            Array.Copy(data, index, result, 0, length);
            return result;
        }

        //
        // expose Bling Fire interfaces
        //

        [DllImport("blingfiretokdll.dll")]
        private static extern Int32 TextToSentences([MarshalAs(UnmanagedType.LPArray)] byte[] InUtf8Str, Int32 InUtf8StrLen, byte[] OutBuff, Int32 MaxBuffSize);

        [DllImport("blingfiretokdll.dll")]
        private static extern Int32 TextToWords([MarshalAs(UnmanagedType.LPArray)] byte[] InUtf8Str, Int32 InUtf8StrLen, byte[] OutBuff, Int32 MaxBuffSize);

        [DllImport("blingfiretokdll.dll")]
        private static extern Int32 TextToSentencesWithOffsets([MarshalAs(UnmanagedType.LPArray)] byte[] InUtf8Str, Int32 InUtf8StrLen, byte[] OutBuff, int[] StartOffsets, int[] EndOffsets, Int32 MaxBuffSize);

        [DllImport("blingfiretokdll.dll")]
        private static extern Int32 TextToWordsWithOffsets([MarshalAs(UnmanagedType.LPArray)] byte[] InUtf8Str, Int32 InUtf8StrLen, byte[] OutBuff, int[] StartOffsets, int[] EndOffsets, Int32 MaxBuffSize);

        private static char[] _justNewLineChar = new char[] { '\n' };
        private static char[] _justSpaceChar = new char[] { ' ' };

    }
}

`

from blingfire.

SergeiAlonichau avatar SergeiAlonichau commented on August 27, 2024

Ok, we have started working on the nuget for the C#, see nuget directory. Please feel free to contribute / suggest.

from blingfire.

SergeiAlonichau avatar SergeiAlonichau commented on August 27, 2024

We have added BlingFireNuget 0.1.4 to nuget.org and github package store and a small test Program.cs file which illustrates how to use the package. So I am closing this issue.

from blingfire.

Related Issues (20)

Recommend Projects

  • React photo React

    A declarative, efficient, and flexible JavaScript library for building user interfaces.

  • Vue.js photo Vue.js

    🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.

  • Typescript photo Typescript

    TypeScript is a superset of JavaScript that compiles to clean JavaScript output.

  • TensorFlow photo TensorFlow

    An Open Source Machine Learning Framework for Everyone

  • Django photo Django

    The Web framework for perfectionists with deadlines.

  • D3 photo D3

    Bring data to life with SVG, Canvas and HTML. 📊📈🎉

Recommend Topics

  • javascript

    JavaScript (JS) is a lightweight interpreted programming language with first-class functions.

  • web

    Some thing interesting about web. New door for the world.

  • server

    A server is a program made to process requests and deliver data to clients.

  • Machine learning

    Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.

  • Game

    Some thing interesting about game, make everyone happy.

Recommend Org

  • Facebook photo Facebook

    We are working to build community through open source technology. NB: members must have two-factor auth.

  • Microsoft photo Microsoft

    Open source projects and samples from Microsoft.

  • Google photo Google

    Google ❤️ Open Source for everyone.

  • D3 photo D3

    Data-Driven Documents codes.