<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.3 20210610//EN" "JATS-journalpublishing1-3.dtd">
<article article-type="research-article" dtd-version="1.3" xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xml:lang="ru"><front><journal-meta><journal-id journal-id-type="publisher-id">vestnikmephi</journal-id><journal-title-group><journal-title xml:lang="ru">Вестник НИЯУ МИФИ</journal-title><trans-title-group xml:lang="en"><trans-title>Vestnik natsional'nogo issledovatel'skogo yadernogo universiteta "MIFI"</trans-title></trans-title-group></journal-title-group><issn pub-type="ppub">2304-487X</issn><publisher><publisher-name>National Research Nuclear University "MEPhI"</publisher-name></publisher></journal-meta><article-meta><article-id pub-id-type="doi">10.1134/S2304487X19060130</article-id><article-id custom-type="elpub" pub-id-type="custom">vestnikmephi-61</article-id><article-categories><subj-group subj-group-type="heading"><subject>Research Article</subject></subj-group><subj-group subj-group-type="section-heading" xml:lang="ru"><subject>ПРИКЛАДНАЯ МАТЕМАТИКА И ИНФОРМАТИКА</subject></subj-group><subj-group subj-group-type="section-heading" xml:lang="en"><subject>APPLIED MATHEMATICS AND COMPUTER SCIENCE</subject></subj-group></article-categories><title-group><article-title>Модель нейронной сети для включения синтаксической структуры предложения в задачу классификации пола автора русского текст</article-title><trans-title-group xml:lang="en"><trans-title>Neural Network Model for Classification of Text’s Author Gender with Including Sentence Dependency Structure</trans-title></trans-title-group></title-group><contrib-group><contrib contrib-type="author" corresp="yes"><name-alternatives><name name-style="eastern" xml:lang="ru"><surname>Сбоев</surname><given-names>А. Г.</given-names></name><name name-style="western" xml:lang="en"><surname>Sboev</surname><given-names>A. G.</given-names></name></name-alternatives><bio xml:lang="ru"><p>123098</p><p>115409</p><p>Москва</p></bio><bio xml:lang="en"><p>123098</p><p>115409</p><p>Moscow</p></bio><email xlink:type="simple">sag111@mail.ru</email><xref ref-type="aff" rid="aff-1"/></contrib><contrib contrib-type="author" corresp="yes"><name-alternatives><name name-style="eastern" xml:lang="ru"><surname>Селиванов</surname><given-names>А. А.</given-names></name><name name-style="western" xml:lang="en"><surname>Selivanov</surname><given-names>A. A.</given-names></name></name-alternatives><bio xml:lang="ru"><p>123098</p><p>Москва</p></bio><bio xml:lang="en"><p>123098</p><p>Moscow</p></bio><xref ref-type="aff" rid="aff-2"/></contrib><contrib contrib-type="author" corresp="yes"><name-alternatives><name name-style="eastern" xml:lang="ru"><surname>Рыбка</surname><given-names>Р. Б.</given-names></name><name name-style="western" xml:lang="en"><surname>Moloshnikov</surname><given-names>I. A.</given-names></name></name-alternatives><bio xml:lang="ru"><p>123098</p><p>Москва</p></bio><bio xml:lang="en"><p>123098</p><p>Moscow</p></bio><xref ref-type="aff" rid="aff-2"/></contrib><contrib contrib-type="author" corresp="yes"><name-alternatives><name name-style="eastern" xml:lang="ru"><surname>Молошников</surname><given-names>И. А.</given-names></name><name name-style="western" xml:lang="en"><surname>Rybka</surname><given-names>R. B.</given-names></name></name-alternatives><bio xml:lang="ru"><p>123098</p><p>Москва</p></bio><bio xml:lang="en"><p>123098</p><p>Moscow</p></bio><xref ref-type="aff" rid="aff-2"/></contrib><contrib contrib-type="author" corresp="yes"><name-alternatives><name name-style="eastern" xml:lang="ru"><surname>Богачев</surname><given-names>Д. С.</given-names></name><name name-style="western" xml:lang="en"><surname>Bogachev</surname><given-names>D. S.</given-names></name></name-alternatives><bio xml:lang="ru"><p>123098</p><p>141701</p><p>Москва</p></bio><bio xml:lang="en"><p>123098</p><p>141701</p><p>Moscow</p></bio><xref ref-type="aff" rid="aff-3"/></contrib></contrib-group><aff-alternatives id="aff-1"><aff xml:lang="ru">Национальный исследовательский центр “Курчатовский институт”; Национальный исследовательский ядерный университет “МИФИ”<country>Россия</country></aff><aff xml:lang="en">National Research Center “Kurchatov Institute”; National Research Nuclear University “MEPhI” (Moscow Engineering Physics Institute)<country>Russian Federation</country></aff></aff-alternatives><aff-alternatives id="aff-2"><aff xml:lang="ru">Национальный исследовательский центр “Курчатовский институт”<country>Россия</country></aff><aff xml:lang="en">National Research Center “Kurchatov Institute”<country>Russian Federation</country></aff></aff-alternatives><aff-alternatives id="aff-3"><aff xml:lang="ru">Национальный исследовательский центр “Курчатовский институт”; Московский физико-технический институт (Национальный исследовательский университет)<country>Россия</country></aff><aff xml:lang="en">National Research Center “Kurchatov Institute”; The Moscow Institute of Physics and Technology (MIPT)<country>Russian Federation</country></aff></aff-alternatives><pub-date pub-type="collection"><year>2019</year></pub-date><pub-date pub-type="epub"><day>12</day><month>02</month><year>2023</year></pub-date><volume>8</volume><issue>6</issue><fpage>569</fpage><lpage>576</lpage><permissions><copyright-statement>Copyright &amp;#x00A9; Сбоев А.Г., Селиванов А.А., Рыбка Р.Б., Молошников И.А., Богачев Д.С., 2023</copyright-statement><copyright-year>2023</copyright-year><copyright-holder xml:lang="ru">Сбоев А.Г., Селиванов А.А., Рыбка Р.Б., Молошников И.А., Богачев Д.С.</copyright-holder><copyright-holder xml:lang="en">Sboev A.G., Selivanov A.A., Moloshnikov I.A., Rybka R.B., Bogachev D.S.</copyright-holder><license license-type="creative-commons-attribution" xlink:href="https://creativecommons.org/licenses/by/4.0/" xlink:type="simple"><license-p>This work is licensed under a Creative Commons Attribution 4.0 License.</license-p></license></permissions><self-uri xlink:href="https://vestnikmephi.elpub.ru/jour/article/view/61">https://vestnikmephi.elpub.ru/jour/article/view/61</self-uri><abstract/><trans-abstract xml:lang="en"><p>   The research proposes the neural network methods to include a textual dependency tree structure in classification tasks of Russian texts. Author profiling task of gender identification was chosen to test the models, and two corpora used in experiments: based on a crowdsource, and in-person polling. The first approach is based on a long short-term memory (LSTM) layers, and developed graph embedding algorithm. The second one is based on a graph convolution network and LSTM. Two syntactic parsers were used to obtain dependency trees from the texts. Input data was represented in different forms: morphological binary vectors, FastText vectors, and their combination. The developed models result was compared to the state-of-the-art, that is neural network model based on a convolutional and LSTM layers. Finally, we demonstrate that including textual dependency tree structure to input feature space improves f1-score of gender classification task on 4 % for the RusPersonality dataset, and 7 % for the crowdsource dataset in average. The developed models resulting f1-score is 84% and 83 %, respectively.</p></trans-abstract><kwd-group xml:lang="ru"><kwd>машинное обучение</kwd><kwd>искусственные нейронные сети</kwd><kwd>обработка естественного языка</kwd><kwd>автоматизированный анализ текстов</kwd><kwd>графовые нейронные сети</kwd><kwd>авторское профилирование</kwd><kwd>определение пола автора текста</kwd></kwd-group><kwd-group xml:lang="en"><kwd>machine learning</kwd><kwd>artificial neural networks</kwd><kwd>natural language processing</kwd><kwd>automated text analysis</kwd><kwd>graph neural networks</kwd><kwd>author profiling</kwd><kwd>author gender identification</kwd></kwd-group><funding-group xml:lang="ru"><funding-statement>Исследование выполнено при финансовой поддержке РФФИ в рамках научного проекта № 18-29-10084 “мк”</funding-statement></funding-group><funding-group xml:lang="en"><funding-statement>Исследование долга по финансам поддержка РФФИ в рамках научного проекта № 18-29-10084 «мк»</funding-statement></funding-group></article-meta></front><back><ref-list><title>References</title><ref id="cit1"><label>1</label><citation-alternatives><mixed-citation xml:lang="ru">Mikolov T., Sutskever I., Chen K., Corrado G. S., Dean J. Distributed representations of words and phrases and their compositionality. Advances in neural information processing systems. MIT Press. 2013. V. 2. P. 3111–3119.</mixed-citation><mixed-citation xml:lang="en">Mikolov T., Sutskever I., Chen K., Corrado G. S., Dean J. Distributed representations of words and phrases and their compositionality. Advances in neural information processing systems. MIT Press. 2013. V. 2. P. 3111–3119.</mixed-citation></citation-alternatives></ref><ref id="cit2"><label>2</label><citation-alternatives><mixed-citation xml:lang="ru">Greff K., Srivastava R. K., Koutnık J., Steunebrink B. R., Bas R., Schmidhuber J. LSTM: A search space odyssey. IEEE transactions on neural networks and learning systems. IEEE. 2016. V. 28. № 10. P. 2222–2232.</mixed-citation><mixed-citation xml:lang="en">Greff K., Srivastava R. K., Koutnık J., Steunebrink B. R., Bas R., Schmidhuber J. LSTM: A search space odyssey. IEEE transactions on neural networks and learning systems. IEEE. 2016. V. 28. № 10. P. 2222–2232.</mixed-citation></citation-alternatives></ref><ref id="cit3"><label>3</label><citation-alternatives><mixed-citation xml:lang="ru">Hassan A., Mahmood A. Deep learning approach for sentiment analysis of short texts. Proceedings of 2017 3rd international conference on control, automation and robotics (ICCAR). IEEE. 2017. P. 705–710.</mixed-citation><mixed-citation xml:lang="en">Hassan A., Mahmood A. Deep learning approach for sentiment analysis of short texts. Proceedings of 2017 3rd international conference on control, automation and robotics (ICCAR). IEEE. 2017. P. 705–710.</mixed-citation></citation-alternatives></ref><ref id="cit4"><label>4</label><citation-alternatives><mixed-citation xml:lang="ru">Tai K. S., Socher R., Manning C. D. Improved semantic representations from tree-structured long short-term memory networks. In: arXiv preprint arXiv:1503.00075. 2015.</mixed-citation><mixed-citation xml:lang="en">Tai K. S., Socher R., Manning C. D. Improved semantic representations from tree-structured long short-term memory networks. In: arXiv preprint arXiv:1503.00075. 2015.</mixed-citation></citation-alternatives></ref><ref id="cit5"><label>5</label><citation-alternatives><mixed-citation xml:lang="ru">Miyazaki R., Komachi M. Japanese Sentiment Classification using a Tree-Structured Long Short-Term Memory with Attention. In: arXiv preprint arXiv:1704.00924. 2017.</mixed-citation><mixed-citation xml:lang="en">Miyazaki R., Komachi M. Japanese Sentiment Classification using a Tree-Structured Long Short-Term Memory with Attention. In: arXiv preprint arXiv:1704.00924. 2017.</mixed-citation></citation-alternatives></ref><ref id="cit6"><label>6</label><citation-alternatives><mixed-citation xml:lang="ru">Sboev A., Moloshnikov I., Gudovskikh D., Rybka R. A comparison of Data Driven models of solving the task of gender identification of author in Russian language texts for cases without and with the gender deception. Journal of Physics: Conference Series. IOP Publishing. 2017. V. 937. № 1. P. 012046.</mixed-citation><mixed-citation xml:lang="en">Sboev A., Moloshnikov I., Gudovskikh D., Rybka R. A comparison of Data Driven models of solving the task of gender identification of author in Russian language texts for cases without and with the gender deception. Journal of Physics: Conference Series. IOP Publishing. 2017. V. 937. № 1. P. 012046.</mixed-citation></citation-alternatives></ref><ref id="cit7"><label>7</label><citation-alternatives><mixed-citation xml:lang="ru">Sboev A., Moloshnikov I., Gudovskikh D., Selivanov A., Rybka R., Litvinova T. Automatic gender identification of author of Russian text by machine learning and neural net algorithms in case of gender deception. Procedia computer science. 2018. № 123. P. 417–423.</mixed-citation><mixed-citation xml:lang="en">Sboev A., Moloshnikov I., Gudovskikh D., Selivanov A., Rybka R., Litvinova T. Automatic gender identification of author of Russian text by machine learning and neural net algorithms in case of gender deception. Procedia computer science. 2018. № 123. P. 417–423.</mixed-citation></citation-alternatives></ref><ref id="cit8"><label>8</label><citation-alternatives><mixed-citation xml:lang="ru">Sboev A., Moloshnikov I., Gudovskikh D., Selivanov A., Rybka R., Litvinova T. Deep Learning neural nets versus traditional machine learning in gender identification of authors of RusProfiling texts. Procedia computer science. 2018. № 123. P. 424–431.</mixed-citation><mixed-citation xml:lang="en">Sboev A., Moloshnikov I., Gudovskikh D., Selivanov A., Rybka R., Litvinova T. Deep Learning neural nets versus traditional machine learning in gender identification of authors of RusProfiling texts. Procedia computer science. 2018. № 123. P. 424–431.</mixed-citation></citation-alternatives></ref><ref id="cit9"><label>9</label><citation-alternatives><mixed-citation xml:lang="ru">Le Cun Y., Bengio Y. Convolutional networks for images, speech, and time series. The handbook of brain theory and neural networks. 1995. № 3361 (10).</mixed-citation><mixed-citation xml:lang="en">Le Cun Y., Bengio Y. Convolutional networks for images, speech, and time series. The handbook of brain theory and neural networks. 1995. № 3361 (10).</mixed-citation></citation-alternatives></ref><ref id="cit10"><label>10</label><citation-alternatives><mixed-citation xml:lang="ru">Grover A., Leskovec J. node2vec: Scalable feature learning for networks. Proceedings of the 22nd ACM SIGKDD international conference on Knowledge discovery and data mining 2016. ACM. 2016. P. 855–864.</mixed-citation><mixed-citation xml:lang="en">Grover A., Leskovec J. node2vec: Scalable feature learning for networks. Proceedings of the 22nd ACM SIGKDD international conference on Knowledge discovery and data mining 2016. ACM. 2016. P. 855–864.</mixed-citation></citation-alternatives></ref><ref id="cit11"><label>11</label><citation-alternatives><mixed-citation xml:lang="ru">Narayanan A., Chandramohan M., Venkatesan R., Chen L., Liu Y., Jaiswal S. graph2vec: Learning distributed representations of graphs. arXiv preprint arX-iv:1707.05005. 2017.</mixed-citation><mixed-citation xml:lang="en">Narayanan A., Chandramohan M., Venkatesan R., Chen L., Liu Y., Jaiswal S. graph2vec: Learning distributed representations of graphs. arXiv preprint arX-iv:1707.05005. 2017.</mixed-citation></citation-alternatives></ref><ref id="cit12"><label>12</label><citation-alternatives><mixed-citation xml:lang="ru">Kipf T.N., Welling M. Semi-supervised classification with graph convolutional networks. arXiv preprint arXiv:1609.02907. 2016.</mixed-citation><mixed-citation xml:lang="en">Kipf T.N., Welling M. Semi-supervised classification with graph convolutional networks. arXiv preprint arXiv:1609.02907. 2016.</mixed-citation></citation-alternatives></ref><ref id="cit13"><label>13</label><citation-alternatives><mixed-citation xml:lang="ru">Veličković P., Cucurull G., Casanova A., Romero A., Lio P., Bengio Y. Graph attention networks. arXiv preprint arXiv:1710.10903. 2017.</mixed-citation><mixed-citation xml:lang="en">Veličković P., Cucurull G., Casanova A., Romero A., Lio P., Bengio Y. Graph attention networks. arXiv preprint arXiv:1710.10903. 2017.</mixed-citation></citation-alternatives></ref><ref id="cit14"><label>14</label><citation-alternatives><mixed-citation xml:lang="ru">Xinyi Z., Chen L. Capsule graph neural network, 2018.</mixed-citation><mixed-citation xml:lang="en">Xinyi Z., Chen L. Capsule graph neural network, 2018.</mixed-citation></citation-alternatives></ref><ref id="cit15"><label>15</label><citation-alternatives><mixed-citation xml:lang="ru">Mikolov T., Sutskever I., Chen K., Corrado G. S., Dean J. Distributed representations of words and phrases and their compositionality. In Advances in neural information processing systems. 2013. P. 3111–3119.</mixed-citation><mixed-citation xml:lang="en">Mikolov T., Sutskever I., Chen K., Corrado G. S., Dean J. Distributed representations of words and phrases and their compositionality. In Advances in neural information processing systems. 2013. P. 3111–3119.</mixed-citation></citation-alternatives></ref><ref id="cit16"><label>16</label><citation-alternatives><mixed-citation xml:lang="ru">Shervashidze, N., Schweitzer, P., Jan van Leeuwen E., Mehlhorn K., Borgwardt K. M. Weisfeiler-lehman graph kernels. Journal of Machine Learning Research. 2011. P. 2539–2561.</mixed-citation><mixed-citation xml:lang="en">Shervashidze, N., Schweitzer, P., Jan van Leeuwen E., Mehlhorn K., Borgwardt K. M. Weisfeiler-lehman graph kernels. Journal of Machine Learning Research. 2011. P. 2539–2561.</mixed-citation></citation-alternatives></ref><ref id="cit17"><label>17</label><citation-alternatives><mixed-citation xml:lang="ru">Goldberg Y., Levy O. Word2vec Explained: deriving Mikolov et al.'s negative-sampling word-embedding method. arXiv preprint arXiv:1402.3722. 2014.</mixed-citation><mixed-citation xml:lang="en">Goldberg Y., Levy O. Word2vec Explained: deriving Mikolov et al.'s negative-sampling word-embedding method. arXiv preprint arXiv:1402.3722. 2014.</mixed-citation></citation-alternatives></ref><ref id="cit18"><label>18</label><citation-alternatives><mixed-citation xml:lang="ru">Straka M., Straková J. Tokenizing, POS Tagging, Lemmatizing and Parsing UD 2.0 with UDPipe. Proceedings of the CoNLL 2017 Shared Task: Multilingual Parsing from Raw Text to Universal Dependencies. Association for Computational Linguistics. Vancouver, Canada. 2017. P. 88–99.</mixed-citation><mixed-citation xml:lang="en">Straka M., Straková J. Tokenizing, POS Tagging, Lemmatizing and Parsing UD 2.0 with UDPipe. Proceedings of the CoNLL 2017 Shared Task: Multilingual Parsing from Raw Text to Universal Dependencies. Association for Computational Linguistics. Vancouver, Canada. 2017. P. 88–99.</mixed-citation></citation-alternatives></ref><ref id="cit19"><label>19</label><citation-alternatives><mixed-citation xml:lang="ru">Rybka R., Sboev A., Moloshnikov I., Gudovskikh D. “Morpho-syntactic parsing based on neural networks and corpus data. Artificial Intelligence and Natural Language and Information Extraction, Social Media and Web Search FRUCT Conference (AINL-ISMW FRUCT). St. Petersburg. 2015. P. 89–95.</mixed-citation><mixed-citation xml:lang="en">Rybka R., Sboev A., Moloshnikov I., Gudovskikh D. “Morpho-syntactic parsing based on neural networks and corpus data. Artificial Intelligence and Natural Language and Information Extraction, Social Media and Web Search FRUCT Conference (AINL-ISMW FRUCT). St. Petersburg. 2015. P. 89–95.</mixed-citation></citation-alternatives></ref><ref id="cit20"><label>20</label><citation-alternatives><mixed-citation xml:lang="ru">Springenberg J. T., Dosovitskiy A., Brox T., Riedmiller M. Striving for simplicity: The all convolutional net. 2014. arXiv preprint, arXiv:1412.6806.</mixed-citation><mixed-citation xml:lang="en">Springenberg J. T., Dosovitskiy A., Brox T., Riedmiller M. Striving for simplicity: The all convolutional net. 2014. arXiv preprint, arXiv:1412.6806.</mixed-citation></citation-alternatives></ref><ref id="cit21"><label>21</label><citation-alternatives><mixed-citation xml:lang="ru">Srivastava N., Hinton G., Krizhevsky A., Sutskever I., Salakhutdinov R. Dropout: a simple way to prevent neural networks from overfitting. The journal of machine learning research. 2014. № 15 (1). P. 1929–1958.</mixed-citation><mixed-citation xml:lang="en">Srivastava N., Hinton G., Krizhevsky A., Sutskever I., Salakhutdinov R. Dropout: a simple way to prevent neural networks from overfitting. The journal of machine learning research. 2014. № 15 (1). P. 1929–1958.</mixed-citation></citation-alternatives></ref><ref id="cit22"><label>22</label><citation-alternatives><mixed-citation xml:lang="ru">Smith L. N. Cyclical learning rates for training neural networks. IEEE Proceedings of the Winter Conference on Applications of Computer Vision (WACV). IEEE. 2017. P. 464–472.</mixed-citation><mixed-citation xml:lang="en">Smith L. N. Cyclical learning rates for training neural networks. IEEE Proceedings of the Winter Conference on Applications of Computer Vision (WACV). IEEE. 2017. P. 464–472.</mixed-citation></citation-alternatives></ref></ref-list><fn-group><fn fn-type="conflict"><p>The authors declare that there are no conflicts of interest present.</p></fn></fn-group></back></article>
